##Kumar, Tanu, Alison E. Post and Isha Ray. 2018.  "Flows, leaks, and blockages in informational interventions: A field experimental study of Bangalore's water sector." World Development.##  

#####REPLICATION CODE######
rm(list=ls())
setwd("/Users/Tanu/Dropbox/DIL Bangalore Project/Impact Evaluation/Final Submission/Replication") #Set WD to the folder in which the data is stored.
library('ivpack')
library('AER')
library('lmtest')
library('ri')
library('multiwayvcov')
library('tables')
set.seed(12345)

##load in main dataset. other datasets will be loaded in as required.###
full=read.csv("Data.csv")

#######################
#Coding Variables####
#Must always run first#
#######################

full$Q204_storagetankyesno[full$Q204_storagetankyesno==2]=0 #recode "no" as 0 instead of 2
full$Q204_storagetankyesno[is.na(full$Q204_storagetankyesno)]=0 # because of the skip logic, those who do not answer the question are assumed not to wait for water #don't knows are classified as NA


full$Q215_decimal_waterwait.x[is.na(full$Q215_decimal_waterwait.x)]=0 # because of the skip logic, those who do not answer the question are assumed not to wait for water
full$Q215_decimal_waterwait.x[full$Q215_decimal_waterwait.x>23]=NA #don't knows and impossible outliers are classified as NA
full$Q215_decimal_waterwait.y[is.na(full$Q215_decimal_waterwait.y)]=0# because of the skip logic, those who do not answer the question are assumed not to wait for water
full$Q215_decimal_waterwait.y[full$Q215_decimal_waterwait.y>23]=NA #don't knows and impossible outliers are classified as NA

full$Q217_hoursmissed.x[is.na(full$Q217_hoursmissed.x)]=0 # because of the skip logic, those who do not answer the question are assumed not to wait for water
full$Q217_hoursmissed.x[full$Q217_hoursmissed.x==999 |full$Q217_hoursmissed.x==777]=NA #don't knows are classified as NA
full$Q217_hoursmissed.y[is.na(full$Q217_hoursmissed.y)]=0 # because of the skip logic, those who do not answer the question are assumed not to wait for water
full$Q217_hoursmissed.y[full$Q217_hoursmissed.y==999 |full$Q217_hoursmissed.y==777]=NA #don't knows are classified as NA


full$Q219_missfunctions.x[is.na(full$Q219_missfunctions.x)]=2 # because of the skip logic, those who do not answer the question are assumed not to wait for water
full$Q219_missfunctions.x[full$Q219_missfunctions.x==2]=0  #recode "no" as 0 instead of 2
full$Q219_missfunctions.x[full$Q219_missfunctions.x>2]=NA  #don't knows are classified as NA
full$Q219_missfunctions.y[is.na(full$Q219_missfunctions.y)]=2 # because of the skip logic, those who do not answer the question are assumed not to wait for water
full$Q219_missfunctions.y[full$Q219_missfunctions.y==2]=0 #recode "no" as 0 instead of 2
full$Q219_missfunctions.y[full$Q219_missfunctions.y>2]=NA #don't knows are classified as NA

full$Q220_waterstorage[is.na(full$Q220_waterstorage)]=0 # because of the skip logic, those who do not answer the question are assumed not to wait for water
full$Q220_waterstorage[full$Q220_waterstorage==1]=0 #based on question wording, a 1 means substitutes are not needed
full$Q220_waterstorage[full$Q220_waterstorage==2]=1 #based on question wording, a 2 means subsitutes are needed

full$Q234_worryaboutwater.x[full$Q234_worryaboutwater.x==999]=NA  #don't knows are classified as NA
full$Q234_worryaboutwater.y[full$Q234_worryaboutwater.y==999]=NA  #don't knows are classified as NA

full$Q235_thinkaboutwater.x[full$Q235_thinkaboutwater.x==999]=NA  #don't knows are classified as NA
full$Q235_thinkaboutwater.y[full$Q235_thinkaboutwater.y==999]=NA #don't knows are classified as NA

full$Q236_worryaboutlate.x[full$Q235_thinkaboutwater.x==999]=NA  #don't knows are classified as NA
full$Q236_worryaboutlate.y[full$Q235_thinkaboutwater.y==999]=NA  #don't knows are classified as NA


full$Q249_BWSSBagree.x[full$Q249_BWSSBagree.x==999]=2 #don't knows are classified as not sure
full$Q249_BWSSBagree.y[full$Q249_BWSSBagree.y==999]=2 #don't knows are classified as not sure

full$Q250_BWSSBagree.x[full$Q250_BWSSBagree.x==999]=2 #don't knows are classified as not sure
full$Q250_BWSSBagree.y[full$Q250_BWSSBagree.y==999]=2 #don't knows are classified as not sure

full$Q251_BWSSBagree.x[full$Q251_BWSSBagree.x==999]=2 #don't knows are classified as not sure
full$Q251_BWSSBagree.y[full$Q251_BWSSBagree.y==999]=2 #don't knows are classified as not sure

full$Q258_int_leaders.x[full$Q258_int_leaders.x==999]=0

colnames(full)=c("X", "HHid", "kaveri","Q104_state", "tank", "refill","supply","Q215w1","Q217w1","Q219w1", "Q220w1","Q234w1","Q235w1", "Q236w1", "Q238_2w1", "Q244_2w1",                      
                 "Q249w1", "Q250w1", "Q251w1", "Q258_int_leaders.x","Q302_integer_numpplhousehold.x","Q307_familyreligion","Q308_category",                 
                 "Q402_roof_type", "q403_4","Q405_vehicles","Q406_have_car", "income","Q501", "gps_location_1.Altitude.x","gps_location_1.Accuracy.x",     
                "gps_location_2.Altitude.x", "gps_location_2.Accuracy.x","gps_location_3.Altitude","gps_location_3.Accuracy",      
                "Q215w2","Q217w2","Q219w2", "Q220bw2","Q234w2","Q235w2", "Q236w2",  "Q238_2w2", "Q244_2w2", "Q249w2", "Q250w2", "Q251w2", "Q505_Ndmessages","Q505b_SMSfreq","Q506d_SMSacc", "block","bl_cl","treat" )                        

##create variables

full$low=ifelse(full$income<3,1,0)#Low income if you are in the first two income brackets
full$kav=ifelse((full$kaveri==1 |full$kaveri==2),1,0) #people who get piped kaveri water are 1
full$everyday=ifelse(full$supply==1,1,0) #people who get water supply every day
full$twofour=ifelse((full$supply>1 & full$supply<4),1,0)#people who get water supply 2-4 days
full$noreg=ifelse(full$supply<4,1,0)# people who get water without regularity
full$tank2=ifelse(full$tank<4,1,0)# people with a tank or sump


#drop block 22- see Pre-Analysis Plan for why we dropped this whole block
full=subset(full, !(full$block==22))




#################################
#Table 1: Conditions at Baseline#
#################################

#collect relevant variables

d=full[,c('Q215w1',
          'Q219w1', 
          'Q217w1',
          'Q220w1', 
          'Q234w1', 
          'Q235w1',         
          'Q249w1',
          'Q250w1',
          'Q251w1', 
          'Q238_2w1','Q244_2w1', 
          'low', 'kav','everyday',
          'twofour', "noreg", "tank2", "refill", "block")]


table=as.data.frame(matrix(nrow=ncol(d), ncol=3))
i=1
for (i in 1:ncol(d)){
  table[i,1]=mean(d[,i], na.rm=TRUE)
  table[i,2]=mean(d[d$block<11,i], na.rm=TRUE)
  table[i,3]=mean(d[d$low==1 &(d$tank2==0 | d$refill==0),i], na.rm=TRUE)
  rownames(table[i,1])=colnames(d[,i])
}
table=cbind(colnames(d), table)
colnames(table)=c("variable", "population.mean","low.income.block.mean","target.pop.mean")

#GENERATE OUTPUT#
table[1:11,]

##############################################################
#Table 2: Intent to treat estimates with covariate adjustment#
##############################################################
#Please run this entire section as a whole. Only when you reach 
#the line that says "tab" will the table print
perms=genperms(Z=full$treat, blockvar=full$block, clustvar=full$bl_cl, maxiter=1000) #create set of 1000 possible permutations of how treatment could have been assigned


d=as.data.frame(cbind(full, perms)) #create a dataset with the the treatment permutations bound to it
 

#create function for generating table rows. Note that different functions need to be created for rows 1, 4,5,and 6. See table footnotes as to why.

#creates row 1
table=function(one, two){
  options(warn=-1) #suppress warnings
  d$w1=one #creates a column with the wave 1 outcome of interest and labels it as w1
  d$w2=two #creates a column with the wave 2 outcome of interest and labels it as w2
  d$Y=d$w2
  d=d[complete.cases(d[,c(1058,1059)]),] #use only cases for which we don't have NAs for the outcomes. Columns 1058 and 1059 are the outcome variables. 
  
  ate1=estate(Y=d$Y, Z=d$treat,X=as.matrix(cbind.data.frame(d$w1,d$low,d$kav,d$twofour,d$noreg,d$tank2))) #calculate ATE, covariate adjusted
  gen=as.matrix(d[,(58:1057)]) #select the 1000 columns with the treatment permutations
  h.ate=genouts(Y=d$Y, Z=d$treat,  ate=0) #create  a treatment effect of 0
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL, X=as.matrix(cbind.data.frame(d$w1,d$low,d$kav,d$twofour,d$noreg,d$tank2))) #create 1000 potential outcomes with a treatment effect of 0
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975)) #get p value
  p = p$two.tailed.p.value 
  cm1=mean(d$w2[d$treat==0], na.rm=TRUE) #get control mean
  a_=c(cm1, ate1, p,1) #get baseline mean, ate, and p. add placeholder for multiple testing adjusted p
  
  bl_1=subset(d, d$block<11)#low income blocks
  bl_1$Y=bl_1$w2-bl_1$w1
  ate1=estate(Y=bl_1$Y, Z=bl_1$treat,X=as.matrix(cbind.data.frame(bl_1$w1,bl_1$low,bl_1$kav,bl_1$twofour,bl_1$noreg,bl_1$tank2))) 
  gen=as.matrix(bl_1[,(58:1057)])
  h.ate=genouts(Y=bl_1$Y, Z=bl_1$treat,  ate=0) 
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL, X=as.matrix(cbind.data.frame(bl_1$w1, bl_1$low,bl_1$kav,bl_1$twofour,bl_1$noreg,bl_1$tank2))) 
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975))
  p = p$two.tailed.p.value
  cm2=mean(bl_1$w2[bl_1$treat==0], na.rm=TRUE)
  b_=c(cm2, ate1, p,1)
  
  ta_1=subset(d, d$income<3 & (d$tank==4| d$refill==0)) #target population. low income without tanks or without automatically refilling tanks
  ta_1$Y=ta_1$w2-ta_1$w1
  ate1=estate(Y=ta_1$Y, Z=ta_1$treat,X=as.matrix(cbind.data.frame(ta_1$w1, ta_1$kav,ta_1$twofour,ta_1$noreg))) 
  gen=as.matrix(ta_1[,(58:1057)])
  h.ate=genouts(Y=ta_1$Y, Z=ta_1$treat,  ate=0) 
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL, X=as.matrix(cbind.data.frame(ta_1$w1, ta_1$kav,ta_1$twofour,ta_1$noreg))) 
  p=dispdist(perm, ate1, quantiles = c(0.00, 0.975))
  p = p$two.tailed.p.value
  cm3=mean(ta_1$w2[ta_1$treat==0], na.rm=TRUE)
  c_=c(cm3, ate1, p,1)

  out=t(as.data.frame(c(a_,b_,c_)))
  colnames(out)=c("overall.control.mean","overall.ate","p1","p1_","low.income.block.ctrlmean","low.income.block.ate","p2","p2_","targ.ctrlmean","targ.ate","p3","p3_")  
  rownames(out)=c(" ")
  return(out)
}

#this function includes wait time as a covariate. Creates rows 2, 3, and 7-11
table1=function(one, two){
  options(warn=-1) #suppress warnings
  d$w1=one #creates a column with the wave 1 outcome of interest and labels it as w1
  d$w2=two #creates a column with the wave 2 outcome of interest and labels it as w2
  d$Y=d$w2
  d=d[complete.cases(d[,c(1058,1059)]),] #use only cases for which we don't have NAs for the outcomes. Columns 1058 and 1059 are the outcome variables. 
  
  ate1=estate(Y=d$Y, Z=d$treat,X=as.matrix(cbind.data.frame(d$w1,d$low,d$kav,d$twofour,d$noreg,d$tank2, d$Q215w1))) #calculate ATE, covariate adjusted
  gen=as.matrix(d[,(58:1057)]) #select the 1000 columns with the treatment permutations
  h.ate=genouts(Y=d$Y, Z=d$treat,  ate=0) #create  a treatment effect of 0
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL, X=as.matrix(cbind.data.frame(d$w1,d$low,d$kav,d$twofour,d$noreg,d$tank2, d$Q215w1))) #create 1000 potential outcomes with a treatment effect of 0
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975)) #get p value
  p = p$two.tailed.p.value 
  cm1=mean(d$w2[d$treat==0], na.rm=TRUE) #get control mean
  a_=c(cm1, ate1, p,1) #get baseline mean, ate, and p. include placeholder for adjusted p value
  
  bl_1=subset(d, d$block<11)#low income blocks
  bl_1$Y=bl_1$w2-bl_1$w1
  ate1=estate(Y=bl_1$Y, Z=bl_1$treat,X=as.matrix(cbind.data.frame(bl_1$w1,bl_1$low,bl_1$kav,bl_1$twofour,bl_1$noreg,bl_1$tank2, bl_1$Q215w1))) 
  gen=as.matrix(bl_1[,(58:1057)])
  h.ate=genouts(Y=bl_1$Y, Z=bl_1$treat,  ate=0) 
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL, X=as.matrix(cbind.data.frame(bl_1$w1, bl_1$low,bl_1$kav,bl_1$twofour,bl_1$noreg,bl_1$tank2, bl_1$Q215w1))) 
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975))
  p = p$two.tailed.p.value
  cm2=mean(bl_1$w2[bl_1$treat==0], na.rm=TRUE)
  b_=c(cm2, ate1, p,1)
  
  ta_1=subset(d, d$income<3 & (d$tank==4| d$refill==0)) #target population. low income without tanks or without automatically refilling tanks
  ta_1$Y=ta_1$w2-ta_1$w1
  ate1=estate(Y=ta_1$Y, Z=ta_1$treat,X=as.matrix(cbind.data.frame(ta_1$w1, ta_1$kav,ta_1$twofour,ta_1$noreg, ta_1$Q215w1))) 
  gen=as.matrix(ta_1[,(58:1057)])
  h.ate=genouts(Y=ta_1$Y, Z=ta_1$treat,  ate=0) 
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL, X=as.matrix(cbind.data.frame(ta_1$w1, ta_1$kav,ta_1$twofour,ta_1$noreg, ta_1$Q215w1))) 
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975))
  p = p$two.tailed.p.value
  cm3=mean(ta_1$w2[ta_1$treat==0], na.rm=TRUE)
  c_=c(cm3, ate1, p,1)
  
  out=t(as.data.frame(c(a_,b_,c_)))
  colnames(out)=c("overall.control.mean", "overall.ate", "p1", "p1_", "low.income.block.ctrlmean", "low.income.block.ate", "p2", "p2_","targ.ctrlmean", "targ.ate", "p3","p3_") 
  rownames(out)=c(" ")
  return(out)
}

#this function calculates single tailed p-values. creates rows 4, 5, and 6
table2=function(one, two){
  options(warn=-1) #suppress warnings
  d$w1=one #creates a column with the wave 1 outcome of interest and labels it as w1
  d$w2=two #creates a column with the wave 2 outcome of interest and labels it as w2
  d$Y=d$w2
  d=d[complete.cases(d[,c(1058,1059)]),] #use only cases for which we don't have NAs for the outcomes. Columns 1058 and 1059 are the outcome variables. 
  
  ate1=estate(Y=d$Y, Z=d$treat,X=as.matrix(cbind.data.frame(d$w1,d$low,d$kav,d$twofour,d$noreg,d$tank2, d$Q215w1))) #calculate ATE, covariate adjusted
  gen=as.matrix(d[,(58:1057)]) #select the 1000 columns with the treatment permutations
  h.ate=genouts(Y=d$Y, Z=d$treat,  ate=0) #create  a treatment effect of 0
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL, X=as.matrix(cbind.data.frame(d$w1,d$low,d$kav,d$twofour,d$noreg,d$tank2, d$Q215w1))) #create 1000 potential outcomes with a treatment effect of 0
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975)) #get p value
  p = p$two.tailed.p.value/2
  cm1=mean(d$w2[d$treat==0], na.rm=TRUE) #get control mean
  a_=c(cm1, ate1, p, 1) #get baseline mean, ate, and p
  
  bl_1=subset(d, d$block<11)#low income blocks
  bl_1$Y=bl_1$w2-bl_1$w1
  ate1=estate(Y=bl_1$Y, Z=bl_1$treat,X=as.matrix(cbind.data.frame(bl_1$w1,bl_1$low,bl_1$kav,bl_1$twofour,bl_1$noreg,bl_1$tank2, bl_1$Q215w1))) 
  gen=as.matrix(bl_1[,(58:1057)])
  h.ate=genouts(Y=bl_1$Y, Z=bl_1$treat,  ate=0) 
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL, X=as.matrix(cbind.data.frame(bl_1$w1, bl_1$low,bl_1$kav,bl_1$twofour,bl_1$noreg,bl_1$tank2, bl_1$Q215w1))) 
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975))
  p = p$two.tailed.p.value/2
  cm2=mean(bl_1$w2[bl_1$treat==0], na.rm=TRUE)
  b_=c(cm2, ate1, p, 1)
  
  ta_1=subset(d, d$income<3 & (d$tank==4| d$refill==0)) #target population. low income without tanks or without automatically refilling tanks
  ta_1$Y=ta_1$w2-ta_1$w1
  ate1=estate(Y=ta_1$Y, Z=ta_1$treat,X=as.matrix(cbind.data.frame(ta_1$w1, ta_1$kav,ta_1$twofour,ta_1$noreg, ta_1$Q215w1))) 
  gen=as.matrix(ta_1[,(58:1057)])
  h.ate=genouts(Y=ta_1$Y, Z=ta_1$treat,  ate=0) 
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL, X=as.matrix(cbind.data.frame(ta_1$w1, ta_1$kav,ta_1$twofour,ta_1$noreg, ta_1$Q215w1))) 
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975))
  p = p$two.tailed.p.value/2
  cm3=mean(ta_1$w2[ta_1$treat==0], na.rm=TRUE)
  c_=c(cm3, ate1, p, 1)
  
  out=t(as.data.frame(c(a_,b_,c_)))
  colnames(out)=c("overall.control.mean", "overall.ate", "p1", "p1_", "low.income.block.ctrlmean", "low.income.block.ate", "p2", "p2_","targ.ctrlmean", "targ.ate", "p3","p3_")
  rownames(out)=c(" ")
  return(out)
}





#GENERATE OUTPUT#
#row 1: time spent waiting for water
a=data.frame(table(d$Q215w1, d$Q215w2))
#row 2: missing community events
b=data.frame(table1(d$Q219w1, d$Q219w2))
#row 3: hours of work missed
c=data.frame(table1(d$Q217w1, d$Q217w2))
#row 4: need for substitutes
d.=data.frame(table2(d$Q220w1, d$Q220bw2))

#Multiple testing adjustments for first group
hhwe=as.data.frame(c(a$p1, a$p2, a$p3, b$p1, b$p2, b$p3, c$p1, c$p2, c$p3, d.$p1, d.$p2, d.$p3))
rownames(hhwe)=c('a$p1', 'a$p2', 'a$p3', 'b$p1', 'b$p2', 'b$p3', 'c$p1', 'c$p2', 'c$p3', 'd.$p1', 'd.$p2', 'd.$p3')
p.1=((p.adjust(hhwe[,1], method="BH")))
p.1=data.frame((matrix(p.1, ncol=3, byrow=TRUE)))


#row 5: worrying about water
e=data.frame(table2(d$Q234w1, d$Q234w2))
#row 6: thinking about water during the day
f=data.frame(table2(d$Q235w1, d$Q235w2))

#Multiple testing adjustments for second group
we=as.data.frame(c(e$p1,e$p2,e$p3,f$p1,f$p2,f$p3))
rownames(we)=c('e$p1','e$p2','e$p3','f$p1','f$p2','f$p3')
p.2=((p.adjust(we[,1], method="BH")))
p.2=data.frame(matrix(p.2, ncol=3, byrow=TRUE))


#row 7: perception that providers are competent
g=data.frame(table1(d$Q249w1, d$Q249w2))
#row 8: perception that providers are innovative and modern
h=data.frame(table1(d$Q250w1, d$Q250w2))
#row 9: perception that providers care about people like us
i=data.frame(table1(d$Q251w1, d$Q251w2))

#Multiple testing adjustments for third group
pe=as.data.frame(c(g$p1, g$p2, g$p3, h$p1, h$p2, h$p3, i$p1, i$p2, i$p3))
rownames(pe)=c('g$p1', 'g$p2', 'g$p3', 'h$p1', 'h$p2', 'h$p3', 'i$p1', 'i$p2', 'i$p3')
p.3=((p.adjust(pe[,1], method="BH")))
p.3=data.frame(matrix(p.3, nrow=3, byrow=TRUE))



#row 10: contacting providers directly about problems with service
j=data.frame(table1(d$Q238_2w1, d$Q238_2w2))
#row 11 : Holding state water utility directly responsible for service
k=data.frame(table1(d$Q244_2w1, d$Q244_2w2))

#Multiple testing adjustments for fourth group
ce=as.data.frame(c(j$p1,j$p2,j$p3,k$p1,k$p2,k$p3))
rownames(ce)=c('j$p1','j$p2','j$p3','k$p1','k$p2','k$p3')
p.4=as.data.frame(p.adjust(ce[,1], method="BH"))
p.4=((p.adjust(ce[,1], method="BH")))
p.4=data.frame(matrix(p.4, ncol=3, byrow=TRUE))


tab=as.data.frame(rbind(a,b,c,d.,e,f,g,h,i,j,k))
p.all=rbind(p.1,p.2,p.3,p.4)
tab$p1_=p.all[,1]
tab$p2_=p.all[,2]
tab$p3_=p.all[,3]
tab

##N##
#row 1, column 1#
nrow(d)
#row 2, column 1#
sum(d$treat)
#row 1, column 2#
nrow(d[d$block<11,])
#row 2, column 2#
sum(d$treat[d$block<11])
#row 1, column 3#
nrow(d[ d$income<3 & (d$tank==4|d$refill==0),])
#row 2, column 3#
sum(d$treat[d$income<3 & (d$tank==4|d$refill==0)])

###########################################################################################
#Table 3. CACE for Households Receiving Accurate Notifications (with covariate adjustment)#
###########################################################################################

#Please run this entire section as a whole. Only when you reach 
#the line that says "tab" will the table print"

#drop people who do not sign up. this includes some control group members
comp=subset(full, full$Q501==1)


#find compliers (those who received messages in the treatment group)
comp$compliance=ifelse((comp$Q505_Ndmessages==1 & !(comp$Q505b_SMSfreq==1) & (comp$Q506d_SMSacc<3)),1,0) #people who receive notifications and receive them NOT rarely AND say they are accurate


#create function for generating table rows. Note that different functions need to be created for rows 1, 4,5,and 6. See table footnotes# SEE COMMENTS FOR TABLE 3 ON THIS


#creates row 1

table=function(one, two){
  options(warn=-1) #suppress warnings
  comp$w1=one #creates a column with the wave 1 outcome of interest and labels it as w1
  comp$w2=two #creates a column with the wave 2 outcome of interest and labels it as w2
  comp$Y=comp$w2 

  all=ivreg(Y~compliance+low+kav+twofour+noreg+tank2+as.factor(block)+w1, 
            ~treat+low+kav+everyday+twofour+noreg+tank2+as.factor(block)+w1, data=comp)  
  vcov=cluster.vcov(all, cluster=comp$bl_cl)
  a=coeftest(all,vcov)
  cm1=mean(comp$w2[comp$treat==0], na.rm=TRUE)
  a_=c(cm1, a[2,1], a[2,2], a[2,4]) 
  
  comp$block=as.vector(as.numeric(comp$block))
  sub1=subset(comp, comp$block<11)
  sub1reg=ivreg(Y~compliance+low+kav+twofour+noreg+tank2+as.factor(block)+w1,
                ~treat+low+kav+twofour+noreg+tank2+as.factor(block)+w1, data=sub1)
  vcov2=cluster.vcov(sub1reg, cluster=sub1$bl_cl)
  b=coeftest(sub1reg,vcov2)
  cm2=mean(sub1$w2[sub1$treat==0], na.rm=TRUE)
  b_=c(cm2, b[2,1], b[2,2], b[2,4])
  
  comp$income=as.vector(as.numeric(comp$income))
  sub2=subset(comp, comp$income<3 & (comp$tank==4| comp$refill==0))
  sub2reg=ivreg(Y~compliance+kav+twofour+noreg+as.factor(block)+w1,
                ~treat+kav+twofour+noreg+as.factor(block)+w1, data=sub2)
  vcov3=cluster.vcov(sub2reg, cluster=sub2$bl_cl)
  c=coeftest(sub2reg,vcov3)
  cm3=mean(sub2$w2[sub2$treat==0], na.rm=TRUE)
  c_=c(cm3, c[2,1], c[2,2], c[2,4])
  
  out=t(as.data.frame(c(a_,b_,c_)))
  
  colnames(out)=c("overall.ctrlmean", "overall.b", "overall.se", "p1","low.income.block.ctrl.mean", "low.income.block.b", "low.income.block.se", "p2","targ.ctrlmean", "targ.b", "targ.se", "p3")
  rownames(out)=c(" ")
  return(out)
}



#this function includes wait time as a covariate. Creates rows 2, 3, and 7-11
table1=function(one, two){ 
  options(warn=-1) #suppress warnings
  comp$w1=one 
  comp$w2=two 
  comp$Y=comp$w2
  all=ivreg(Y~compliance+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1,
            ~treat+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1, data=comp)
  vcov=cluster.vcov(all, cluster=comp$bl_cl)
  a=coeftest(all,vcov)
  cm1=mean(comp$w2[comp$treat==0], na.rm=TRUE)
  a_=c(cm1, a[2,1], a[2,2], a[2,4])
  
  comp$block=as.vector(as.numeric(comp$block))
  sub1=subset(comp, comp$block<11)
  sub1reg=ivreg(Y~compliance+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1,
                ~treat+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1, data=sub1)
  vcov2=cluster.vcov(sub1reg, cluster=sub1$bl_cl)
  b=coeftest(sub1reg,vcov2)
  cm2=mean(sub1$w2[sub1$treat==0], na.rm=TRUE)
  b_=c(cm2, b[2,1], b[2,2], b[2,4])
  
  comp$income=as.vector(as.numeric(comp$income))
  sub2=subset(comp, comp$income<3 & (comp$tank==4| comp$refill==0))
  sub2reg=ivreg(Y~compliance+kav+twofour+noreg+Q215w1+as.factor(block)+w1,
                ~treat+kav+twofour+noreg+Q215w1+as.factor(block)+w1, data=sub2)
  vcov3=cluster.vcov(sub2reg, cluster=sub2$bl_cl)
  c=coeftest(sub2reg,vcov3)
  cm3=mean(sub2$w2[sub2$treat==0], na.rm=TRUE)
  c_=c(cm3, c[2,1], c[2,2], c[2,4]) 
  out=t(as.data.frame(c(a_,b_,c_))) 
  colnames(out)=c("overall.ctrlmean", "overall.b", "overall.se", "p1","low.income.block.ctrl.mean", "low.income.block.b", "low.income.block.se", "p2","targ.ctrlmean", "targ.b", "targ.se", "p3")
  rownames(out)=c(" ")
  return(out)
}


#this function calculates single tailed p-values. creates rows 4, 5, and 6
table2=function(one, two){
  options(warn=-1) #suppress warnings
  comp$w1=one 
  comp$w2=two 
  comp$Y=comp$w2
  
  all=ivreg(Y~compliance+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1,
            ~treat+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1, data=comp)
  vcov=cluster.vcov(all, cluster=comp$bl_cl)
  a=coeftest(all,vcov)
  cm1=mean(comp$w2[comp$treat==0], na.rm=TRUE)
  a_=c(cm1, a[2,1], a[2,2], a[2,4]/2)
  
  comp$block=as.vector(as.numeric(comp$block))
  sub1=subset(comp, comp$block<11)
  sub1reg=ivreg(Y~compliance+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1,
                ~treat+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1, data=sub1)
  vcov2=cluster.vcov(sub1reg, cluster=sub1$bl_cl)
  b=coeftest(sub1reg,vcov2)
  cm2=mean(sub1$w2[sub1$treat==0], na.rm=TRUE)
  b_=c(cm2, b[2,1], b[2,2], b[2,4]/2)
  
  comp$income=as.vector(as.numeric(comp$income))
  sub2=subset(comp, comp$income<3 & (comp$tank==4| comp$refill==0))
  sub2reg=ivreg(Y~compliance+kav+twofour+noreg+Q215w1+as.factor(block)+w1,
                ~treat+kav+twofour+noreg+Q215w1+as.factor(block)+w1, data=sub2)
  vcov3=cluster.vcov(sub2reg, cluster=sub2$bl_cl)
  c=coeftest(sub2reg,vcov3)
  cm3=mean(sub2$w2[sub2$treat==0], na.rm=TRUE)
  c_=c(cm3, c[2,1], c[2,2], c[2,4]/2) 
  out=t(as.data.frame(c(a_,b_,c_)))  
  colnames(out)=c("overall.ctrlmean", "overall.b", "overall.se", "p1","low.income.block.ctrl.mean", "low.income.block.b", "low.income.block.se", "p2","targ.ctrlmean", "targ.b", "targ.se", "p3")
  rownames(out)=c(" ")
  return(out)
}



#GENERATE OUTPUT#
#row 1: time spent waiting for water
a=data.frame(table(comp$Q215w1, comp$Q215w2))
#row 2: missing community events
b=data.frame(table1(comp$Q219w1, comp$Q219w2))
#row 3: hours of work missed
c=data.frame(table1(comp$Q217w1, comp$Q217w2))
#row 4: need for substitutes
d.=data.frame(table2(comp$Q220w1, comp$Q220bw2))

#Multiple testing adjustments for first group
hhwe=as.data.frame(c(a$p1, a$p2, a$p3, b$p1, b$p2, b$p3, c$p1, c$p2, c$p3, d.$p1, d.$p2, d.$p3))
rownames(hhwe)=c('a$p1', 'a$p2', 'a$p3', 'b$p1', 'b$p2', 'b$p3', 'c$p1', 'c$p2', 'c$p3', 'd.$p1', 'd.$p2', 'd.$p3')
p.1=((p.adjust(hhwe[,1], method="BH")))
p.1=data.frame((matrix(p.1, ncol=3, byrow=TRUE)))


#row 5: worrying about water
e=data.frame(table2(comp$Q234w1, comp$Q234w2))
#row 6: thinking about water during the day
f=data.frame(table2(comp$Q235w1, comp$Q235w2))

#Multiple testing adjustments for second group
we=as.data.frame(c(e$p1,e$p2,e$p3,f$p1,f$p2,f$p3))
rownames(we)=c('e$p1','e$p2','e$p3','f$p1','f$p2','f$p3')
p.2=((p.adjust(we[,1], method="BH")))
p.2=data.frame(matrix(p.2, ncol=3, byrow=TRUE))


#row 7: perception that providers are competent
g=data.frame(table1(comp$Q249w1, comp$Q249w2))
#row 8: perception that providers are innovative and modern
h=data.frame(table1(comp$Q250w1, comp$Q250w2))
#row 9: perception that providers care about people like us
i=data.frame(table1(comp$Q251w1, comp$Q251w2))

#Multiple testing adjustments for third group
pe=as.data.frame(c(g$p1, g$p2, g$p3, h$p1, h$p2, h$p3, i$p1, i$p2, i$p3))
rownames(pe)=c('g$p1', 'g$p2', 'g$p3', 'h$p1', 'h$p2', 'h$p3', 'i$p1', 'i$p2', 'i$p3')
p.3=((p.adjust(pe[,1], method="BH")))
p.3=data.frame(matrix(p.3, nrow=3, byrow=TRUE))



#row 10: contacting providers directly about problems with service
j=data.frame(table1(comp$Q238_2w1, comp$Q238_2w2))
#row 11 : Holding state water utility directly responsible for service
k=data.frame(table1(comp$Q244_2w1, comp$Q244_2w2))

#Multiple testing adjustments for fourth group
ce=as.data.frame(c(j$p1,j$p2,j$p3,k$p1,k$p2,k$p3))
rownames(ce)=c('j$p1','j$p2','j$p3','k$p1','k$p2','k$p3')
p.4=as.data.frame(p.adjust(ce[,1], method="BH"))
p.4=((p.adjust(ce[,1], method="BH")))
p.4=data.frame(matrix(p.4, ncol=3, byrow=TRUE))


tab=as.data.frame(rbind(a,b,c,d.,e,f,g,h,i,j,k))
p.all=rbind(p.1,p.2,p.3,p.4)
tab$p1=p.all[,1]
tab$p2=p.all[,2]
tab$p3=p.all[,3]
tab

##N###
#row 1, column 1#
nrow(comp)
#row 2, column 1#
sum(comp$treat)
#row 3, column 1#
sum(comp$compliance[comp$treat==1])

#row 1, column 2#
nrow(comp[comp$block<11,])
#row 2, column 2#
sum(comp$treat[comp$block<11])
#row 3, column 2#
sum(comp$compliance[comp$block<11 & comp$treat==1])

#row 1, column 3#
nrow(comp[ comp$income<3 & (comp$tank==4|comp$refill==0),])
#row 2, column 3#
sum(comp$treat[comp$income<3 & (comp$tank==4|comp$refill==0)])
#row 3, column 3#
sum(comp$compliance[comp$income<3 & (comp$tank==4|comp$refill==0) & comp$treat==1])




##################################################################################################################
#Figure A.2. Histogram of discrepancies between household reports of water arrival times and valveman report data#
##################################################################################################################

#load in data about discrepancies between VM reports and survey responses. Code for this data available upon request.

v=read.csv("Accuracy.csv")
#GENERATE OUTPUT#
hist(v$Q209Discrp, breaks=20, ylim=c(0,200))



####################################################
#Table A.1. Variable Definitions and Characteristics
####################################################

#collect relevant variables

d=full[,c('Q215w1',
          'Q219w1', 
          'Q217w1',
          'Q220w1', 
          'Q234w1', 
          'Q235w1',         
          'Q249w1',
          'Q250w1',
          'Q251w1', 
          'Q238_2w1','Q244_2w1', 
          'low', 'kav','everyday',
          'twofour', "noreg", "tank2", "refill")]


table=as.data.frame(matrix(nrow=ncol(d), ncol=4))
for (i in 1:ncol(d)){
  table[i,1]=mean(d[,i], na.rm=TRUE)
  table[i,2]=max(d[,i], na.rm=TRUE)
  table[i,3]=min(d[,i], na.rm=TRUE)
  table[i,4]=sd(d[,i], na.rm=TRUE)
  rownames(table[i,1])=colnames(d[,i])
}
table=cbind(colnames(d), table)
#reorder to reflect order in table
colnames(table)=c("variable", "mean","max","min","sd")

#GENERATE OUTPUT#
table #view



#######################################
#Table A.2. Baseline Covariate Balance#
#######################################

##CREATE VARIABLES FOR BALANCE ASSESSMENT

#create a measure of altitude for each observation
full$gps_location_1.Altitude.x[full$gps_location_1.Accuracy.x>5]=NA #drop altitude observations with less than 5 m accuracy
full$gps_location_2.Altitude.x[full$gps_location_2.Accuracy.x>5]=NA
full$gps_location_3.Altitude[full$gps_location_3.Accuracy>5]=NA
alt=data.frame(cbind(full$gps_location_1.Altitude.x, full$gps_location_2.Altitude.x,full$gps_location_3.Altitude))
altavg=data.frame(rowMeans(alt, na.rm = TRUE)) #creating an average



#create a measure of belonging in an outgroup- those who are muslim or lower caste
out=ifelse((full$Q307_familyreligion==2 |full$Q308_category==3|full$Q308_category==4),1,0)

#indicator for having load bearing roof, motorized vehicle, and fridge
assets=ifelse((full$Q402_roof_type==1 & (full$Q405_vehicles>0|full$Q406_have_car==1)& full$q403_4==1),1,0)

#indicator for being from Karnataka
karnat=ifelse(full$Q104_state=='Karnataka',1,0)

#collect variables of interest
d=full[,c( 'Q302_integer_numpplhousehold.x',  'low', 
          'kav', 'tank2', 'everyday', 'twofour', 'noreg', 'Q215w2',
          'Q258_int_leaders.x', 'treat',"block", 'bl_cl')]

hist(as.numeric(d[,7]))
table(as.numeric(d[,7]))
colnames(d)
d=cbind.data.frame(altavg, d, out, assets, karnat) #bind new variables
colnames(d)
d=d[c(1,2,16,14,3,15,4,5,6,7,8,9,10,11,12,13)]#reorder to reflect order in table

perms=genperms(Z=d$treat, blockvar=d$block, clustvar=d$bl_cl, maxiter=10000) #using randomization inference to assess balance. generates treatment permutations

d=cbind.data.frame(d, perms)
bal=as.data.frame(matrix(ncol=4,nrow=13))


for (i in 1:13){
  bal[i,1]=mean(d[d$treat==0,i], na.rm=TRUE)
  bal[i,2]=mean(d[d$treat==1,i], na.rm=TRUE)
  ate1=estate(Y=na.omit(d[,i]), Z=d$treat[is.na(d[,i])==FALSE])
  gen=as.matrix(d[complete.cases(d[,i]),(17:10016)])
  h.ate=genouts(Y=na.omit(d[,i]), Z=d$treat[is.na(d[,i])==FALSE], ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975))
  bal[i,3]=p$two.tailed.p.value
  bal[i,4]=ks.test(d[d$treat==0,i], d[d$treat==1,i])$p.value
}

#GENERATE OUTPUT#
bal=cbind(colnames(d)[1:13], bal)
bal
#####################################################################
#Table A.3. Intent-to-Treat Estimates (without covariate adjustment)#
#####################################################################
#NOTE: P-values may not match those in the paper exactly because of the use of permutation inference

perms=genperms(Z=full$treat, blockvar=full$block, clustvar=full$bl_cl, maxiter=1000) #create set of 1000 possible permutations of how treatment could have been assigned


d=as.data.frame(cbind(full, perms)) #create a dataset with the the treatment permutations bound to it


#create function for generating table rows. 

#creates rows 1-3, 7-11
table1=function(one, two){
  options(warn=-1) #suppress warnings
  d$w1=one #creates a column with the wave 1 outcome of interest and labels it as w1
  d$w2=two #creates a column with the wave 2 outcome of interest and labels it as w2
 
  d=d[complete.cases(d[,c(1060,1061)]),] #use only cases for which we don't have NAs for the outcomes. Columns 1058 and 1059 are the outcome variables. 
   
  Y= d$w2
  ate1=estate(Y=Y, Z=d$treat) #calculate ATE
  gen=as.matrix(d[,(60:1059)]) #select the 1000 columns with the treatment permutations
  h.ate=genouts(Y=Y, Z=d$treat, ate=0) #create a null hypothesis treatment effect of 0
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL) #create 1000 potential outcomes with a treatment effect of 0
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975)) 
  p = p$two.tailed.p.value #get p values
  cm1=mean(d$w2[d$treat==0], na.rm=TRUE) #get baseline mean
  a_=c(cm1, ate1, p,1) #get baseline mean, ate, and p
  
  bl_1=subset(d, d$block<11) #low income blocks
  Y=bl_1$w2
  ate1=estate(Y=Y, Z=bl_1$treat)
  gen=as.matrix(bl_1[,(60:1059)])
  h.ate=genouts(Y=Y, Z=bl_1$treat, ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975))
  p = p$two.tailed.p.value
  cm2=mean(bl_1$w2[bl_1$treat==0], na.rm=TRUE)
  b_=c(cm2, ate1, p,1)
  
  ta_1=subset(d, d$income<3 &  (d$tank==4| d$refill==0)) #target population. low income without tanks or without automatically refilling tanks
  Y=ta_1$w2
  ate1=estate(Y=Y, Z=ta_1$treat)
  gen=as.matrix(ta_1[,(60:1059)])
  h.ate=genouts(Y=Y, Z=ta_1$treat, ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975))
  p = p$two.tailed.p.value
  cm3=mean(ta_1$w2[ta_1$treat==0], na.rm=TRUE)
  c_=c(cm3, ate1, p,1)
  
out=t(as.data.frame(c(a_,b_,c_)))
colnames(out)=c("overall.control.mean", "overall.ate", "p1", "p1_", "low.income.block.ctrlmean", "low.income.block.ate", "p2", "p2_","targ.ctrlmean", "targ.ate", "p3","p3_")
rownames(out)=c(" ")
return(out)
}



#creates rows 4-6. one tailed p-values
table2=function(one, two){
  options(warn=-1) #suppress warnings
  d$w1=one #creates a column with the wave 1 outcome of interest and labels it as w1
  d$w2=two #creates a column with the wave 2 outcome of interest and labels it as w2
  
  d=d[complete.cases(d[,c(1060,1061)]),] #use only cases for which we don't have NAs for the outcomes. Columns 1058 and 1059 are the outcome variables.  
  Y= d$w2
  ate1=estate(Y=Y, Z=d$treat)
  gen=as.matrix(d[,(60:1059)])
  h.ate=genouts(Y=Y, Z=d$treat, ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.00, 0.95))
  if (ate1<0) {
    p = p$lesser.p.value
  } else {
    p = p$greater.p.value
  }
  cm1=mean(d$w2[d$treat==0], na.rm=TRUE)
  a_=c(cm1, ate1, p,1)
  
  bl_1=subset(d, d$block<11)
  Y=bl_1$w2
  ate1=estate(Y=Y, Z=bl_1$treat)
  gen=as.matrix(bl_1[,(60:1059)])
  h.ate=genouts(Y=Y, Z=bl_1$treat, ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.00, 0.95))
  if (ate1<0) {
    p = p$lesser.p.value
  } else {
    p = p$greater.p.value
  }
  cm2=mean(bl_1$w2[bl_1$treat==0], na.rm=TRUE)
  b_=c(cm2, ate1, p,1)
  
  ta_1=subset(d, d$income<3 & (d$tank==4| d$refill==0))
  Y=ta_1$w2
  ate1=estate(Y=Y, Z=ta_1$treat)
  gen=as.matrix(ta_1[,(60:1059)])
  h.ate=genouts(Y=Y, Z=ta_1$treat, ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.00, 0.95))
  if (ate1<0) {
    p = p$lesser.p.value
  } else {
    p = p$greater.p.value
  }
  cm3=mean(ta_1$w2[ta_1$treat==0], na.rm=TRUE)
  c_=c(cm3, ate1, p,1)
  out=t(as.data.frame(c(a_,b_,c_)))
  colnames(out)=c("overall.control.mean", "overall.ate", "p1", "p1_", "low.income.block.ctrlmean", "low.income.block.ate", "p2", "p2_","targ.ctrlmean", "targ.ate", "p3","p3_")
  return(out)
  rownames(out)=c(" ")
}




#GENERATE OUTPUT#
#row 1: time spent waiting for water
a=data.frame(table1(d$Q215w1, d$Q215w2))
#row 2: missing community events
b=data.frame(table1(d$Q219w1, d$Q219w2))
#row 3: hours of work missed
c=data.frame(table1(d$Q217w1, d$Q217w2))
#row 4: need for substitutes
d.=data.frame(table2(d$Q220w1, d$Q220bw2))

#Multiple testing adjustments for first group
hhwe=as.data.frame(c(a$p1, a$p2, a$p3, b$p1, b$p2, b$p3, c$p1, c$p2, c$p3, d.$p1, d.$p2, d.$p3))
rownames(hhwe)=c('a$p1', 'a$p2', 'a$p3', 'b$p1', 'b$p2', 'b$p3', 'c$p1', 'c$p2', 'c$p3', 'd.$p1', 'd.$p2', 'd.$p3')
p.1=((p.adjust(hhwe[,1], method="BH")))
p.1=data.frame((matrix(p.1, ncol=3, byrow=TRUE)))


#row 5: worrying about water
e=data.frame(table2(d$Q234w1, d$Q234w2))
#row 6: thinking about water during the day
f=data.frame(table2(d$Q235w1, d$Q235w2))

#Multiple testing adjustments for second group
we=as.data.frame(c(e$p1,e$p2,e$p3,f$p1,f$p2,f$p3))
rownames(we)=c('e$p1','e$p2','e$p3','f$p1','f$p2','f$p3')
p.2=((p.adjust(we[,1], method="BH")))
p.2=data.frame(matrix(p.2, ncol=3, byrow=TRUE))


#row 7: perception that providers are competent
g=data.frame(table1(d$Q249w1, d$Q249w2))
#row 8: perception that providers are innovative and modern
h=data.frame(table1(d$Q250w1, d$Q250w2))
#row 9: perception that providers care about people like us
i=data.frame(table1(d$Q251w1, d$Q251w2))

#Multiple testing adjustments for third group
pe=as.data.frame(c(g$p1, g$p2, g$p3, h$p1, h$p2, h$p3, i$p1, i$p2, i$p3))
rownames(pe)=c('g$p1', 'g$p2', 'g$p3', 'h$p1', 'h$p2', 'h$p3', 'i$p1', 'i$p2', 'i$p3')
p.3=((p.adjust(pe[,1], method="BH")))
p.3=data.frame(matrix(p.3, nrow=3, byrow=TRUE))



#row 10: contacting providers directly about problems with service
j=data.frame(table1(d$Q238_2w1, d$Q238_2w2))
#row 11 : Holding state water utility directly responsible for service
k=data.frame(table1(d$Q244_2w1, d$Q244_2w2))

#Multiple testing adjustments for fourth group
ce=as.data.frame(c(j$p1,j$p2,j$p3,k$p1,k$p2,k$p3))
rownames(ce)=c('j$p1','j$p2','j$p3','k$p1','k$p2','k$p3')
p.4=as.data.frame(p.adjust(ce[,1], method="BH"))
p.4=((p.adjust(ce[,1], method="BH")))
p.4=data.frame(matrix(p.4, ncol=3, byrow=TRUE))


tab=as.data.frame(rbind(a,b,c,d.,e,f,g,h,i,j,k))
p.all=rbind(p.1,p.2,p.3,p.4)
tab$p1_=p.all[,1]
tab$p2_=p.all[,2]
tab$p3_=p.all[,3]
tab



##N###
#row 1, column 1#
nrow(d)
#row 2, column 1#
sum(d$treat)
#row 1, column 2#
nrow(d[d$block<11,])
#row 2, column 2#
sum(d$treat[d$block<11])
#row 1, column 3#
nrow(d[ d$income<3 & (d$tank==4|d$refill==0),])
#row 2, column 3#
sum(d$treat[d$income<3 & (d$tank==4|d$refill==0)])
#############################################################################
#Table A. 4. CACE for All Households Receiving Notifications (no covariates)#
#############################################################################




#drop people who do not sign up.
comp=subset(full, full$Q501==1)

#find compliers
comp$compliance=ifelse((comp$Q505_Ndmessages==1 & !(comp$Q505b_SMSfreq==1)),1,0) #people who receive notifications and receive them NOT rarely
comp$compliance[is.na(comp$compliance)]=0 #we had one obs for which this was NA. not sure why- will need to dig in


#creates rows 1-3, 7-11
table1=function(one, two){
  options(warn=-1) #suppress warnings
  comp$w1=one #creates a column with the wave 1 outcome of interest and labels it as w1
  comp$w2=two #creates a column with the wave 2 outcome of interest and labels it as w2
  comp$Y=comp$w2-comp$w1 #create difference in differences outcome
  Y=comp$Y
  all=ivreg(Y~compliance+as.factor(block),~treat+as.factor(block), data=comp)
  vcov=cluster.vcov(all, cluster=comp$bl_cl)
  a=coeftest(all,vcov)
  cm1=mean(comp$w2[comp$treat==0], na.rm=TRUE)
  a_=c(cm1, a[2,1], a[2,2], a[2,4])
  

  sub1=subset(comp, comp$block<11)
  sub1reg=ivreg(Y~sub1$compliance+as.factor(block),~treat+as.factor(block), data=sub1)
  vcov2=cluster.vcov(sub1reg, cluster=sub1$bl_cl)
  b=coeftest(sub1reg,vcov2)
  cm2=mean(sub1$w2[sub1$treat==0], na.rm=TRUE)
  b_=c(cm2, b[2,1], b[2,2], b[2,4])
  
  comp$income=as.vector(as.numeric(comp$income))
  sub2=subset(comp, comp$income<3 & (comp$tank==4 | comp$refill==0))
  sub2reg=ivreg(Y~compliance+as.factor(block),~treat+as.factor(block), data=sub2)
  vcov3=cluster.vcov(sub2reg, cluster=sub2$bl_cl)
  c=coeftest(sub2reg,vcov3)
  cm3=mean(sub2$w2[sub2$treat==0], na.rm=TRUE)
  c_=c(cm3, c[2,1], c[2,2], c[2,4])
  out=t(as.data.frame(c(a_,b_,c_)))  
  colnames(out)=c("overall.ctrlmean", "overall.b", "overall.se", "p1","low.income.block.ctrl.mean", "low.income.block.b", "low.income.block.se", "p2","targ.ctrlmean", "targ.b", "targ.se", "p3")
  rownames(out)=c(" ")
  return(out)
}


#creates rows 1-3, 7-11
table2=function(one, two){
  options(warn=-1) #suppress warnings
  comp$w1=one #creates a column with the wave 1 outcome of interest and labels it as w1
  comp$w2=two #creates a column with the wave 2 outcome of interest and labels it as w2
  comp$Y=comp$w2-comp$w1 #create difference in differences outcome
  Y=comp$Y
  all=ivreg(Y~compliance+as.factor(block),~treat+as.factor(block), data=comp)
  vcov=cluster.vcov(all, cluster=comp$bl_cl)
  a=coeftest(all,vcov)
  cm1=mean(comp$w2[comp$treat==0], na.rm=TRUE)
  a_=c(cm1, a[2,1], a[2,2], a[2,4]/2)
  
  comp$block=as.vector(as.numeric(comp$block))
  sub1=subset(comp, comp$block<11)
  sub1reg=ivreg(Y~compliance+as.factor(block),~treat+as.factor(block), data=sub1)
  vcov2=cluster.vcov(sub1reg, cluster=sub1$bl_cl)
  b=coeftest(sub1reg,vcov2)
  cm2=mean(sub1$w2[sub1$treat==0], na.rm=TRUE)
  b_=c(cm2, b[2,1], b[2,2], b[2,4]/2)
  
  comp$income=as.vector(as.numeric(comp$income))
  sub2=subset(comp, comp$income<3 & (comp$tank==4 | comp$refill==0))
  sub2reg=ivreg(Y~compliance+as.factor(block),~treat+as.factor(block), data=sub2)
  vcov3=cluster.vcov(sub2reg, cluster=sub2$bl_cl)
  c=coeftest(sub2reg,vcov3)
  cm3=mean(sub2$w2[sub2$treat==0], na.rm=TRUE)
  c_=c(cm3, c[2,1], c[2,2], c[2,4]/2)
  out=t(as.data.frame(c(a_,b_,c_)))  
  colnames(out)=c("overall.ctrlmean", "overall.b", "overall.se", "p1","low.income.block.ctrl.mean", "low.income.block.b", "low.income.block.se", "p2","targ.ctrlmean", "targ.b", "targ.se", "p3")
  rownames(out)=c(" ")
  return(out)
}



#GENERATE OUTPUT#

#GENERATE OUTPUT#
#row 1: time spent waiting for water
a=data.frame(table1(comp$Q215w1, comp$Q215w2))
#row 2: missing community events
b=data.frame(table1(comp$Q219w1, comp$Q219w2))
#row 3: hours of work missed
c=data.frame(table1(comp$Q217w1, comp$Q217w2))
#row 4: need for substitutes
d.=data.frame(table2(comp$Q220w1, comp$Q220bw2))

#Multiple testing adjustments for first group
hhwe=as.data.frame(c(a$p1, a$p2, a$p3, b$p1, b$p2, b$p3, c$p1, c$p2, c$p3, d.$p1, d.$p2, d.$p3))
rownames(hhwe)=c('a$p1', 'a$p2', 'a$p3', 'b$p1', 'b$p2', 'b$p3', 'c$p1', 'c$p2', 'c$p3', 'd.$p1', 'd.$p2', 'd.$p3')
p.1=((p.adjust(hhwe[,1], method="BH")))
p.1=data.frame((matrix(p.1, ncol=3, byrow=TRUE)))


#row 5: worrying about water
e=data.frame(table2(comp$Q234w1, comp$Q234w2))
#row 6: thinking about water during the day
f=data.frame(table2(comp$Q235w1, comp$Q235w2))

#Multiple testing adjustments for second group
we=as.data.frame(c(e$p1,e$p2,e$p3,f$p1,f$p2,f$p3))
rownames(we)=c('e$p1','e$p2','e$p3','f$p1','f$p2','f$p3')
p.2=((p.adjust(we[,1], method="BH")))
p.2=data.frame(matrix(p.2, ncol=3, byrow=TRUE))


#row 7: perception that providers are competent
g=data.frame(table1(comp$Q249w1, comp$Q249w2))
#row 8: perception that providers are innovative and modern
h=data.frame(table1(comp$Q250w1, comp$Q250w2))
#row 9: perception that providers care about people like us
i=data.frame(table1(comp$Q251w1, comp$Q251w2))

#Multiple testing adjustments for third group
pe=as.data.frame(c(g$p1, g$p2, g$p3, h$p1, h$p2, h$p3, i$p1, i$p2, i$p3))
rownames(pe)=c('g$p1', 'g$p2', 'g$p3', 'h$p1', 'h$p2', 'h$p3', 'i$p1', 'i$p2', 'i$p3')
p.3=((p.adjust(pe[,1], method="BH")))
p.3=data.frame(matrix(p.3, nrow=3, byrow=TRUE))



#row 10: contacting providers directly about problems with service
j=data.frame(table1(comp$Q238_2w1, comp$Q238_2w2))
#row 11 : Holding state water utility directly responsible for service
k=data.frame(table1(comp$Q244_2w1, comp$Q244_2w2))

#Multiple testing adjustments for fourth group
ce=as.data.frame(c(j$p1,j$p2,j$p3,k$p1,k$p2,k$p3))
rownames(ce)=c('j$p1','j$p2','j$p3','k$p1','k$p2','k$p3')
p.4=as.data.frame(p.adjust(ce[,1], method="BH"))
p.4=((p.adjust(ce[,1], method="BH")))
p.4=data.frame(matrix(p.4, ncol=3, byrow=TRUE))


tab=as.data.frame(rbind(a,b,c,d.,e,f,g,h,i,j,k))
p.all=rbind(p.1,p.2,p.3,p.4)
tab$p1=p.all[,1]
tab$p2=p.all[,2]
tab$p3=p.all[,3]
tab

##N###
#row 1, column 1#
nrow(comp)
#row 2, column 1#
sum(comp$treat)
#row 3, column 1#
sum(comp$compliance[comp$treat==1])

#row 1, column 2#
nrow(comp[comp$block<11,])
#row 2, column 2#
sum(comp$treat[comp$block<11])
#row 3, column 2#
sum(comp$compliance[comp$block<11 & comp$treat==1])

#row 1, column 3#
nrow(comp[ comp$income<3 & (comp$tank==4|comp$refill==0),])
#row 2, column 3#
sum(comp$treat[comp$income<3 & (comp$tank==4|comp$refill==0)])
#row 3, column 3#
sum(comp$compliance[comp$income<3 & (comp$tank==4|comp$refill==0) & comp$treat==1])



######################################################################################
##Table A.5. CACE for Households Receiving Notifications (with covariate adjustment)##
######################################################################################




#drop people who do not sign up. this includes some control group members
comp=subset(full, full$Q501==1)


#find compliers (those who received messages in the treatment group)
comp$compliance=ifelse(comp$Q505_Ndmessages==1 & !(comp$Q505b_SMSfreq==1) ,1,0) #people who receive notifications and receive them NOT rarely 


#create function for generating table rows. Note that different functions need to be created for rows 1, 4,5,and 6. See table footnotes# SEE COMMENTS FOR TABLE 3 ON THIS


#creates row 1

table=function(one, two){
  options(warn=-1) #suppress warnings
  comp$w1=one #creates a column with the wave 1 outcome of interest and labels it as w1
  comp$w2=two #creates a column with the wave 2 outcome of interest and labels it as w2
  comp$Y=comp$w2 
  
  all=ivreg(Y~compliance+low+kav+twofour+noreg+tank2+as.factor(block)+w1, 
            ~treat+low+kav+everyday+twofour+noreg+tank2+as.factor(block)+w1, data=comp)  
  vcov=cluster.vcov(all, cluster=comp$bl_cl)
  a=coeftest(all,vcov)
  cm1=mean(comp$w2[comp$treat==0], na.rm=TRUE)
  a_=c(cm1, a[2,1], a[2,2], a[2,4]) 
  
  comp$block=as.vector(as.numeric(comp$block))
  sub1=subset(comp, comp$block<11)
  sub1reg=ivreg(Y~compliance+low+kav+twofour+noreg+tank2+as.factor(block)+w1,
                ~treat+low+kav+twofour+noreg+tank2+as.factor(block)+w1, data=sub1)
  vcov2=cluster.vcov(sub1reg, cluster=sub1$bl_cl)
  b=coeftest(sub1reg,vcov2)
  cm2=mean(sub1$w2[sub1$treat==0], na.rm=TRUE)
  b_=c(cm2, b[2,1], b[2,2], b[2,4])
  
  comp$income=as.vector(as.numeric(comp$income))
  sub2=subset(comp, comp$income<3 & (comp$tank==4| comp$refill==0))
  sub2reg=ivreg(Y~compliance+kav+twofour+noreg+as.factor(block)+w1,
                ~treat+kav+twofour+noreg+as.factor(block)+w1, data=sub2)
  vcov3=cluster.vcov(sub2reg, cluster=sub2$bl_cl)
  c=coeftest(sub2reg,vcov3)
  cm3=mean(sub2$w2[sub2$treat==0], na.rm=TRUE)
  c_=c(cm3, c[2,1], c[2,2], c[2,4])
  
  out=t(as.data.frame(c(a_,b_,c_)))
  
  colnames(out)=c("overall.ctrlmean", "overall.b", "overall.se", "p1","low.income.block.ctrl.mean", "low.income.block.b", "low.income.block.se", "p2","targ.ctrlmean", "targ.b", "targ.se", "p3")
  rownames(out)=c(" ")
  return(out)
}



#this function includes wait time as a covariate. Creates rows 2, 3, and 7-11
table1=function(one, two){ 
  options(warn=-1) #suppress warnings
  comp$w1=one 
  comp$w2=two 
  comp$Y=comp$w2
  all=ivreg(Y~compliance+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1,
            ~treat+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1, data=comp)
  vcov=cluster.vcov(all, cluster=comp$bl_cl)
  a=coeftest(all,vcov)
  cm1=mean(comp$w2[comp$treat==0], na.rm=TRUE)
  a_=c(cm1, a[2,1], a[2,2], a[2,4])
  
  comp$block=as.vector(as.numeric(comp$block))
  sub1=subset(comp, comp$block<11)
  sub1reg=ivreg(Y~compliance+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1,
                ~treat+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1, data=sub1)
  vcov2=cluster.vcov(sub1reg, cluster=sub1$bl_cl)
  b=coeftest(sub1reg,vcov2)
  cm2=mean(sub1$w2[sub1$treat==0], na.rm=TRUE)
  b_=c(cm2, b[2,1], b[2,2], b[2,4])
  
  comp$income=as.vector(as.numeric(comp$income))
  sub2=subset(comp, comp$income<3 & (comp$tank==4| comp$refill==0))
  sub2reg=ivreg(Y~compliance+kav+twofour+noreg+Q215w1+as.factor(block)+w1,
                ~treat+kav+twofour+noreg+Q215w1+as.factor(block)+w1, data=sub2)
  vcov3=cluster.vcov(sub2reg, cluster=sub2$bl_cl)
  c=coeftest(sub2reg,vcov3)
  cm3=mean(sub2$w2[sub2$treat==0], na.rm=TRUE)
  c_=c(cm3, c[2,1], c[2,2], c[2,4]) 
  out=t(as.data.frame(c(a_,b_,c_))) 
  colnames(out)=c("overall.ctrlmean", "overall.b", "overall.se", "p1","low.income.block.ctrl.mean", "low.income.block.b", "low.income.block.se", "p2","targ.ctrlmean", "targ.b", "targ.se", "p3")
  rownames(out)=c(" ")
  return(out)
}


#this function calculates single tailed p-values. creates rows 4, 5, and 6
table2=function(one, two){
  options(warn=-1) #suppress warnings
  comp$w1=one 
  comp$w2=two 
  comp$Y=comp$w2
  
  all=ivreg(Y~compliance+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1,
            ~treat+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1, data=comp)
  vcov=cluster.vcov(all, cluster=comp$bl_cl)
  a=coeftest(all,vcov)
  cm1=mean(comp$w2[comp$treat==0], na.rm=TRUE)
  a_=c(cm1, a[2,1], a[2,2], a[2,4]/2)
  
  comp$block=as.vector(as.numeric(comp$block))
  sub1=subset(comp, comp$block<11)
  sub1reg=ivreg(Y~compliance+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1,
                ~treat+low+kav+twofour+noreg+tank2+Q215w1+as.factor(block)+w1, data=sub1)
  vcov2=cluster.vcov(sub1reg, cluster=sub1$bl_cl)
  b=coeftest(sub1reg,vcov2)
  cm2=mean(sub1$w2[sub1$treat==0], na.rm=TRUE)
  b_=c(cm2, b[2,1], b[2,2], b[2,4]/2)
  
  comp$income=as.vector(as.numeric(comp$income))
  sub2=subset(comp, comp$income<3 & (comp$tank==4| comp$refill==0))
  sub2reg=ivreg(Y~compliance+kav+twofour+noreg+Q215w1+as.factor(block)+w1,
                ~treat+kav+twofour+noreg+Q215w1+as.factor(block)+w1, data=sub2)
  vcov3=cluster.vcov(sub2reg, cluster=sub2$bl_cl)
  c=coeftest(sub2reg,vcov3)
  cm3=mean(sub2$w2[sub2$treat==0], na.rm=TRUE)
  c_=c(cm3, c[2,1], c[2,2], c[2,4]/2) 
  out=t(as.data.frame(c(a_,b_,c_)))  
  colnames(out)=c("overall.ctrlmean", "overall.b", "overall.se", "p1","low.income.block.ctrl.mean", "low.income.block.b", "low.income.block.se", "p2","targ.ctrlmean", "targ.b", "targ.se", "p3")
  rownames(out)=c(" ")
  return(out)
}



#GENERATE OUTPUT#
#row 1: time spent waiting for water
a=data.frame(table(comp$Q215w1, comp$Q215w2))
#row 2: missing community events
b=data.frame(table1(comp$Q219w1, comp$Q219w2))
#row 3: hours of work missed
c=data.frame(table1(comp$Q217w1, comp$Q217w2))
#row 4: need for substitutes
d.=data.frame(table2(comp$Q220w1, comp$Q220bw2))

#Multiple testing adjustments for first group
hhwe=as.data.frame(c(a$p1, a$p2, a$p3, b$p1, b$p2, b$p3, c$p1, c$p2, c$p3, d.$p1, d.$p2, d.$p3))
rownames(hhwe)=c('a$p1', 'a$p2', 'a$p3', 'b$p1', 'b$p2', 'b$p3', 'c$p1', 'c$p2', 'c$p3', 'd.$p1', 'd.$p2', 'd.$p3')
p.1=((p.adjust(hhwe[,1], method="BH")))
p.1=data.frame((matrix(p.1, ncol=3, byrow=TRUE)))


#row 5: worrying about water
e=data.frame(table2(comp$Q234w1, comp$Q234w2))
#row 6: thinking about water during the day
f=data.frame(table2(comp$Q235w1, comp$Q235w2))

#Multiple testing adjustments for second group
we=as.data.frame(c(e$p1,e$p2,e$p3,f$p1,f$p2,f$p3))
rownames(we)=c('e$p1','e$p2','e$p3','f$p1','f$p2','f$p3')
p.2=((p.adjust(we[,1], method="BH")))
p.2=data.frame(matrix(p.2, ncol=3, byrow=TRUE))


#row 7: perception that providers are competent
g=data.frame(table1(comp$Q249w1, comp$Q249w2))
#row 8: perception that providers are innovative and modern
h=data.frame(table1(comp$Q250w1, comp$Q250w2))
#row 9: perception that providers care about people like us
i=data.frame(table1(comp$Q251w1, comp$Q251w2))

#Multiple testing adjustments for third group
pe=as.data.frame(c(g$p1, g$p2, g$p3, h$p1, h$p2, h$p3, i$p1, i$p2, i$p3))
rownames(pe)=c('g$p1', 'g$p2', 'g$p3', 'h$p1', 'h$p2', 'h$p3', 'i$p1', 'i$p2', 'i$p3')
p.3=((p.adjust(pe[,1], method="BH")))
p.3=data.frame(matrix(p.3, nrow=3, byrow=TRUE))



#row 10: contacting providers directly about problems with service
j=data.frame(table1(comp$Q238_2w1, comp$Q238_2w2))
#row 11 : Holding state water utility directly responsible for service
k=data.frame(table1(comp$Q244_2w1, comp$Q244_2w2))

#Multiple testing adjustments for fourth group
ce=as.data.frame(c(j$p1,j$p2,j$p3,k$p1,k$p2,k$p3))
rownames(ce)=c('j$p1','j$p2','j$p3','k$p1','k$p2','k$p3')
p.4=as.data.frame(p.adjust(ce[,1], method="BH"))
p.4=((p.adjust(ce[,1], method="BH")))
p.4=data.frame(matrix(p.4, ncol=3, byrow=TRUE))


tab=as.data.frame(rbind(a,b,c,d.,e,f,g,h,i,j,k))
p.all=rbind(p.1,p.2,p.3,p.4)
tab$p1=p.all[,1]
tab$p2=p.all[,2]
tab$p3=p.all[,3]
tab

##N###
#row 1, column 1#
nrow(comp)
#row 2, column 1#
sum(comp$treat)
#row 3, column 1#
sum(comp$compliance[comp$treat==1])

#row 1, column 2#
nrow(comp[comp$block<11,])
#row 2, column 2#
sum(comp$treat[comp$block<11])
#row 3, column 2#
sum(comp$compliance[comp$block<11 & comp$treat==1])


#row 1, column 3#
nrow(comp[ comp$income<3 & (comp$tank==4|comp$refill==0),])
#row 2, column 3#
sum(comp$treat[comp$income<3 & (comp$tank==4|comp$refill==0)])
#row 3, column 3#
sum(comp$compliance[comp$income<3 & (comp$tank==4|comp$refill==0) & comp$treat==1])

###############################################################################################
#Table A.6 CACE for Households Receiving Accurate Notifications (without covariate adjustment)#
###############################################################################################





#drop people who do not sign up.
comp=subset(full, full$Q501==1)

#find compliers
comp$compliance=ifelse((comp$Q505_Ndmessages==1 & !(comp$Q505b_SMSfreq==1) & (comp$Q506d_SMSacc<3)),1,0) #people who receive notifications and receive them NOT rarely AND say they are accurate



#creates rows 1-3, 7-11
table1=function(one, two){
  options(warn=-1) #suppress warnings
  comp$w1=one #creates a column with the wave 1 outcome of interest and labels it as w1
  comp$w2=two #creates a column with the wave 2 outcome of interest and labels it as w2
  comp$Y=comp$w2-comp$w1 #create difference in differences outcome
  Y=comp$Y
  all=ivreg(Y~compliance+as.factor(block),~treat+as.factor(block), data=comp)
  vcov=cluster.vcov(all, cluster=comp$bl_cl)
  a=coeftest(all,vcov)
  cm1=mean(comp$w2[comp$treat==0], na.rm=TRUE)
  a_=c(cm1, a[2,1], a[2,2], a[2,4])
  
  
  sub1=subset(comp, comp$block<11)
  sub1reg=ivreg(Y~sub1$compliance+as.factor(block),~treat+as.factor(block), data=sub1)
  vcov2=cluster.vcov(sub1reg, cluster=sub1$bl_cl)
  b=coeftest(sub1reg,vcov2)
  cm2=mean(sub1$w2[sub1$treat==0], na.rm=TRUE)
  b_=c(cm2, b[2,1], b[2,2], b[2,4])
  
  comp$income=as.vector(as.numeric(comp$income))
  sub2=subset(comp, comp$income<3 & (comp$tank==4 | comp$refill==0))
  sub2reg=ivreg(Y~compliance+as.factor(block),~treat+as.factor(block), data=sub2)
  vcov3=cluster.vcov(sub2reg, cluster=sub2$bl_cl)
  c=coeftest(sub2reg,vcov3)
  cm3=mean(sub2$w2[sub2$treat==0], na.rm=TRUE)
  c_=c(cm3, c[2,1], c[2,2], c[2,4])
  out=t(as.data.frame(c(a_,b_,c_)))  
  colnames(out)=c("overall.ctrlmean", "overall.b", "overall.se", "p1","low.income.block.ctrl.mean", "low.income.block.b", "low.income.block.se", "p2","targ.ctrlmean", "targ.b", "targ.se", "p3")
  rownames(out)=c(" ")
  return(out)
}


#creates rows 1-3, 7-11
table2=function(one, two){
  options(warn=-1) #suppress warnings
  comp$w1=one #creates a column with the wave 1 outcome of interest and labels it as w1
  comp$w2=two #creates a column with the wave 2 outcome of interest and labels it as w2
  comp$Y=comp$w2-comp$w1 #create difference in differences outcome
  Y=comp$Y
  all=ivreg(Y~compliance+as.factor(block),~treat+as.factor(block), data=comp)
  vcov=cluster.vcov(all, cluster=comp$bl_cl)
  a=coeftest(all,vcov)
  cm1=mean(comp$w2[comp$treat==0], na.rm=TRUE)
  a_=c(cm1, a[2,1], a[2,2], a[2,4]/2)
  
  comp$block=as.vector(as.numeric(comp$block))
  sub1=subset(comp, comp$block<11)
  sub1reg=ivreg(Y~compliance+as.factor(block),~treat+as.factor(block), data=sub1)
  vcov2=cluster.vcov(sub1reg, cluster=sub1$bl_cl)
  b=coeftest(sub1reg,vcov2)
  cm2=mean(sub1$w2[sub1$treat==0], na.rm=TRUE)
  b_=c(cm2, b[2,1], b[2,2], b[2,4]/2)
  
  comp$income=as.vector(as.numeric(comp$income))
  sub2=subset(comp, comp$income<3 & (comp$tank==4 | comp$refill==0))
  sub2reg=ivreg(Y~compliance+as.factor(block),~treat+as.factor(block), data=sub2)
  vcov3=cluster.vcov(sub2reg, cluster=sub2$bl_cl)
  c=coeftest(sub2reg,vcov3)
  cm3=mean(sub2$w2[sub2$treat==0], na.rm=TRUE)
  c_=c(cm3, c[2,1], c[2,2], c[2,4]/2)
  out=t(as.data.frame(c(a_,b_,c_)))  
  colnames(out)=c("overall.ctrlmean", "overall.b", "overall.se", "p1","low.income.block.ctrl.mean", "low.income.block.b", "low.income.block.se", "p2","targ.ctrlmean", "targ.b", "targ.se", "p3")
  rownames(out)=c(" ")
  return(out)
}



#GENERATE OUTPUT#

#GENERATE OUTPUT#
#row 1: time spent waiting for water
a=data.frame(table1(comp$Q215w1, comp$Q215w2))
#row 2: missing community events
b=data.frame(table1(comp$Q219w1, comp$Q219w2))
#row 3: hours of work missed
c=data.frame(table1(comp$Q217w1, comp$Q217w2))
#row 4: need for substitutes
d.=data.frame(table2(comp$Q220w1, comp$Q220bw2))

#Multiple testing adjustments for first group
hhwe=as.data.frame(c(a$p1, a$p2, a$p3, b$p1, b$p2, b$p3, c$p1, c$p2, c$p3, d.$p1, d.$p2, d.$p3))
rownames(hhwe)=c('a$p1', 'a$p2', 'a$p3', 'b$p1', 'b$p2', 'b$p3', 'c$p1', 'c$p2', 'c$p3', 'd.$p1', 'd.$p2', 'd.$p3')
p.1=((p.adjust(hhwe[,1], method="BH")))
p.1=data.frame((matrix(p.1, ncol=3, byrow=TRUE)))


#row 5: worrying about water
e=data.frame(table2(comp$Q234w1, comp$Q234w2))
#row 6: thinking about water during the day
f=data.frame(table2(comp$Q235w1, comp$Q235w2))

#Multiple testing adjustments for second group
we=as.data.frame(c(e$p1,e$p2,e$p3,f$p1,f$p2,f$p3))
rownames(we)=c('e$p1','e$p2','e$p3','f$p1','f$p2','f$p3')
p.2=((p.adjust(we[,1], method="BH")))
p.2=data.frame(matrix(p.2, ncol=3, byrow=TRUE))


#row 7: perception that providers are competent
g=data.frame(table1(comp$Q249w1, comp$Q249w2))
#row 8: perception that providers are innovative and modern
h=data.frame(table1(comp$Q250w1, comp$Q250w2))
#row 9: perception that providers care about people like us
i=data.frame(table1(comp$Q251w1, comp$Q251w2))

#Multiple testing adjustments for third group
pe=as.data.frame(c(g$p1, g$p2, g$p3, h$p1, h$p2, h$p3, i$p1, i$p2, i$p3))
rownames(pe)=c('g$p1', 'g$p2', 'g$p3', 'h$p1', 'h$p2', 'h$p3', 'i$p1', 'i$p2', 'i$p3')
p.3=((p.adjust(pe[,1], method="BH")))
p.3=data.frame(matrix(p.3, nrow=3, byrow=TRUE))



#row 10: contacting providers directly about problems with service
j=data.frame(table1(comp$Q238_2w1, comp$Q238_2w2))
#row 11 : Holding state water utility directly responsible for service
k=data.frame(table1(comp$Q244_2w1, comp$Q244_2w2))

#Multiple testing adjustments for fourth group
ce=as.data.frame(c(j$p1,j$p2,j$p3,k$p1,k$p2,k$p3))
rownames(ce)=c('j$p1','j$p2','j$p3','k$p1','k$p2','k$p3')
p.4=as.data.frame(p.adjust(ce[,1], method="BH"))
p.4=((p.adjust(ce[,1], method="BH")))
p.4=data.frame(matrix(p.4, ncol=3, byrow=TRUE))


tab=as.data.frame(rbind(a,b,c,d.,e,f,g,h,i,j,k))
p.all=rbind(p.1,p.2,p.3,p.4)
tab$p1=p.all[,1]
tab$p2=p.all[,2]
tab$p3=p.all[,3]
tab

##N###
#row 1, column 1#
nrow(comp)
#row 2, column 1#
sum(comp$treat)
#row 3, column 1#
sum(comp$compliance[comp$treat==1])

#row 1, column 2#
nrow(comp[comp$block<11,])
#row 2, column 2#
sum(comp$treat[comp$block<11])
#row 3, column 2#
sum(comp$compliance[comp$block<11 & comp$treat==1])

#row 1, column 3#
nrow(comp[ comp$income<3 & (comp$tank==4|comp$refill==0),])
#row 2, column 3#
sum(comp$treat[comp$income<3 & (comp$tank==4|comp$refill==0)])
#row 3, column 3#
sum(comp$compliance[comp$income<3 & (comp$tank==4|comp$refill==0) & comp$treat==1])

#############################################################################################################################
#Table A.7. Average Treatment Effects for Valve Areas Receiving Accurate Notifications (Defined Based on Water Arrival Time)#
#############################################################################################################################
#P-values may not be exact due to the fact that we are using permutation inference. 

v=read.csv("Accuracy.csv") #load in data. dataset assesses discrepancies between household reported water arrival and valvemen water arrival reports.
#households report arrival times in two different ways- Q206 is a response to "on which day does your water arrive", and Q209 is a response to "at what time does your water arrive"
#See table footnote for more information
valvenames=unique(as.vector(v$Valve.Area))
Q209=data.frame(matrix(data = NA, nrow=length(valvenames), ncol=3))
Q209[,1]=valvenames
colnames(Q209)=c('VA.Name', "Q209Mean", "Q209Mode")

#function that finds the mode of a vector
mode_ <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

for (k in 1: length(valvenames)){
    VAuse=subset(v, v$Valve.Area==valvenames[k]) 
  Q209[k,2]=mean(VAuse$Q209Discrp, na.rm=TRUE)
  if (sum(!is.na(VAuse$Q209Discrp))>0) Q209[k,3]=mode_(VAuse$Q209Discrp[!is.na(VAuse$Q209Discrp)])
} 
Q209$Q209Mode=as.numeric(Q209$Q209Mode) #find the modal discrepancy (per valve area)between VM reports and household responses to
#at what time does your water arrive


valves=read.csv('valveareas.csv')
d=merge(full,valves, by="HHid" ) #merge data and dataset placing households in valve areas
d=merge(d,Q209, by="VA.Name") #merge dataset and mode calculated above by valve area




perms=genperms(Z=d$treat, blockvar=d$block, clustvar=d$bl_cl, maxiter=1000) #create a dataset with the the treatment permutations bound to it


d=as.data.frame(cbind(d, perms))


#creates rows 1-3, 7-11
table1=function(one, two){
  options(warn=-1) #suppress warnings
  dat$w1=one #creates a column with the wave 1 outcome of interest and labels it as w1
  dat$w2=two #creates a column with the wave 2 outcome of interest and labels it as w2
  dat=dat[complete.cases(dat[,c(1063,1064)]),] #use only cases for which we don't have NAs for the outcomes. Columns 1058 and 1059 are the outcome variables. 
  
  Y= dat$w2
  ate1=estate(Y=Y, Z=dat$treat) #calculate ATE
  gen=as.matrix(dat[,(63:1062)]) #select the 1000 columns with the treatment permutations
  h.ate=genouts(Y=Y, Z=dat$treat, ate=0) #create a null hypothesis treatment effect of 0
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL) #create 1000 potential outcomes with a treatment effect of 0
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975)) 
  p = p$two.tailed.p.value #get p values
  cm1=mean(dat$w2[dat$treat==0], na.rm=TRUE) #get baseline mean
  a_=c(cm1, ate1, p,1) #get baseline mean, ate, and p
  
  bl_1=subset(dat, dat$block<11) #low income blocks
  Y=bl_1$w2
  ate1=estate(Y=Y, Z=bl_1$treat)
  gen=as.matrix(bl_1[,(63:1062)])
  h.ate=genouts(Y=Y, Z=bl_1$treat, ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975))
  p = p$two.tailed.p.value
  cm2=mean(bl_1$w2[bl_1$treat==0], na.rm=TRUE)
  b_=c(cm2, ate1, p,1)
  
  ta_1=subset(dat, dat$income<3 &  (dat$tank==4| dat$refill==0)) #target population. low income without tanks or without automatically refilling tanks
  Y=ta_1$w2
  ate1=estate(Y=Y, Z=ta_1$treat)
  gen=as.matrix(ta_1[,(63:1062)])
  h.ate=genouts(Y=Y, Z=ta_1$treat, ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975))
  p = p$two.tailed.p.value
  cm3=mean(ta_1$w2[ta_1$treat==0], na.rm=TRUE)
  c_=c(cm3, ate1, p,1)
  
  out=t(as.data.frame(c(a_,b_,c_)))
  colnames(out)=c("overall.control.mean", "overall.ate", "p1", "p1_", "low.income.block.ctrlmean", "low.income.block.ate", "p2", "p2_","targ.ctrlmean", "targ.ate", "p3","p3_")
  rownames(out)=c(" ")
  return(out)
}



#creates rows 4-6. one tailed p-values
table2=function(one, two){
  options(warn=-1) #suppress warnings
  dat$w1=one #creates a column with the wave 1 outcome of interest and labels it as w1
  dat$w2=two #creates a column with the wave 2 outcome of interest and labels it as w2
  
  dat=dat[complete.cases(dat[,c(1063,1064)]),] #use only cases for which we don't have NAs for the outcomes. Columns 1058 and 1059 are the outcome variables.  
  Y= dat$w2
  ate1=estate(Y=Y, Z=dat$treat)
  gen=as.matrix(dat[,(63:1062)])
  h.ate=genouts(Y=Y, Z=dat$treat, ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.00, 0.95))
  if (ate1<0) {
    p = p$lesser.p.value
  } else {
    p = p$greater.p.value
  }
  cm1=mean(dat$w2[dat$treat==0], na.rm=TRUE)
  a_=c(cm1, ate1, p,1)
  
  bl_1=subset(dat, dat$block<11)
  Y=bl_1$w2
  ate1=estate(Y=Y, Z=bl_1$treat)
  gen=as.matrix(bl_1[,(63:1062)])
  h.ate=genouts(Y=Y, Z=bl_1$treat, ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.00, 0.95))
  if (ate1<0) {
    p = p$lesser.p.value
  } else {
    p = p$greater.p.value
  }
  cm2=mean(bl_1$w2[bl_1$treat==0], na.rm=TRUE)
  b_=c(cm2, ate1, p,1)
  
  ta_1=subset(dat, dat$income<3 & (dat$tank==4| dat$refill==0))
  Y=ta_1$w2
  ate1=estate(Y=Y, Z=ta_1$treat)
  gen=as.matrix(ta_1[,(63:1062)])
  h.ate=genouts(Y=Y, Z=ta_1$treat, ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.00, 0.95))
  if (ate1<0) {
    p = p$lesser.p.value
  } else {
    p = p$greater.p.value
  }
  cm3=mean(ta_1$w2[ta_1$treat==0], na.rm=TRUE)
  c_=c(cm3, ate1, p,1)
  out=t(as.data.frame(c(a_,b_,c_)))
  colnames(out)=c("overall.control.mean", "overall.ate", "p1", "p1_", "low.income.block.ctrlmean", "low.income.block.ate", "p2", "p2_","targ.ctrlmean", "targ.ate", "p3","p3_")
  return(out)
  rownames(out)=c(" ")
}




dat=subset(d, Q209Mode<1 & Q209Mode>-2 )

#GENERATE OUTPUT#
#row 1: time spent waiting for water
a=data.frame(table1(dat$Q215w1, dat$Q215w2))
#row 2: missing community events
b=data.frame(table1(dat$Q219w1, dat$Q219w2))
#row 3: hours of work missed
c=data.frame(table1(dat$Q217w1, dat$Q217w2))
#row 4: need for substitutes
d.=data.frame(table2(dat$Q220w1, dat$Q220bw2))

#Multiple testing adjustments for first group
hhwe=as.data.frame(c(a$p1, a$p2, a$p3, b$p1, b$p2, b$p3, c$p1, c$p2, c$p3, d.$p1, d.$p2, d.$p3))
rownames(hhwe)=c('a$p1', 'a$p2', 'a$p3', 'b$p1', 'b$p2', 'b$p3', 'c$p1', 'c$p2', 'c$p3', 'd.$p1', 'd.$p2', 'd.$p3')
p.1=((p.adjust(hhwe[,1], method="BH")))
p.1=data.frame((matrix(p.1, ncol=3, byrow=TRUE)))


#row 5: worrying about water
e=data.frame(table2(dat$Q234w1, dat$Q234w2))
#row 6: thinking about water during the day
f=data.frame(table2(dat$Q235w1, dat$Q235w2))

#Multiple testing adjustments for second group
we=as.data.frame(c(e$p1,e$p2,e$p3,f$p1,f$p2,f$p3))
rownames(we)=c('e$p1','e$p2','e$p3','f$p1','f$p2','f$p3')
p.2=((p.adjust(we[,1], method="BH")))
p.2=data.frame(matrix(p.2, ncol=3, byrow=TRUE))


#row 7: perception that providers are competent
g=data.frame(table1(dat$Q249w1, dat$Q249w2))
#row 8: perception that providers are innovative and modern
h=data.frame(table1(dat$Q250w1, dat$Q250w2))
#row 9: perception that providers care about people like us
i=data.frame(table1(dat$Q251w1, dat$Q251w2))

#Multiple testing adjustments for third group
pe=as.data.frame(c(g$p1, g$p2, g$p3, h$p1, h$p2, h$p3, i$p1, i$p2, i$p3))
rownames(pe)=c('g$p1', 'g$p2', 'g$p3', 'h$p1', 'h$p2', 'h$p3', 'i$p1', 'i$p2', 'i$p3')
p.3=((p.adjust(pe[,1], method="BH")))
p.3=data.frame(matrix(p.3, nrow=3, byrow=TRUE))



#row 10: contacting providers directly about problems with service
j=data.frame(table1(dat$Q238_2w1, dat$Q238_2w2))
#row 11 : Holding state water utility directly responsible for service
k=data.frame(table1(dat$Q244_2w1, dat$Q244_2w2))

#Multiple testing adjustments for fourth group
ce=as.data.frame(c(j$p1,j$p2,j$p3,k$p1,k$p2,k$p3))
rownames(ce)=c('j$p1','j$p2','j$p3','k$p1','k$p2','k$p3')
p.4=as.data.frame(p.adjust(ce[,1], method="BH"))
p.4=((p.adjust(ce[,1], method="BH")))
p.4=data.frame(matrix(p.4, ncol=3, byrow=TRUE))


tab=as.data.frame(rbind(a,b,c,d.,e,f,g,h,i,j,k))
p.all=rbind(p.1,p.2,p.3,p.4)
tab$p1_=p.all[,1]
tab$p2_=p.all[,2]
tab$p3_=p.all[,3]
tab





##N###
#row 1, column 1#
nrow(dat)
#row 2, column 1#
sum(dat$treat)


#row 1, column 2#
nrow(dat[dat$block<11,])
#row 2, column 2#
sum(dat$treat[dat$block<11])
#row 1, column 3#
nrow(dat[ dat$income<3 & (dat$tank==4|dat$refill==0),])
#row 2, column 3#
sum(dat$treat[dat$income<3 & (dat$tank==4|dat$refill==0)])


##############################################################################################################################
#Table A.8. Average Treatment Effects for Valve Areas Receiving Accurate Notifications (Defined Based on Day of Water Supply)#
##############################################################################################################################
#P-values may not be exact due to the fact that we are using permutation inference
v=read.csv("Accuracy.csv") #load in data. dataset assesses discrepancies between household reported water arrival and valvemen water arrival reports.
#households report arrival times in two different ways- Q206 is a response to "on which day does your water arrive", and Q209 is a response to "at what time does your water arrive"
#See table footnote for more information
valvenames=unique(as.vector(v$Valve.Area))
Q206=data.frame(matrix(data = NA, nrow=length(valvenames), ncol=3))
Q206[,1]=valvenames
colnames(Q206)=c('VA.Name', "Q206Mean", "Q206Mode")

#function that finds the mode of a vector
mode_ <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

for (k in 1: length(valvenames)){
  
  VAuse=subset(v, v$Valve.Area==valvenames[k])
  Q206[k,2]=mean(VAuse$Q206_discrepancy, na.rm=TRUE)
  if (sum(!is.na(VAuse$Q206_discrepancy))>0) Q206[k,3]=mode_(VAuse$Q206_discrepancy[!is.na(VAuse$Q206_discrepancy)])
} 


Q206$Q206Mode=as.numeric(Q206$Q206Mode) #find the modal discrepancy (per valve area)between VM reports and household responses to
#on what day does your water arrive


valves=read.csv('valveareas.csv')
d=merge(full,valves, by="HHid" ) #merge data and dataset placing households in valve areas
d=merge(d,Q206, by="VA.Name") #merge dataset and mode calculated above by valve area




perms=genperms(Z=d$treat, blockvar=d$block, clustvar=d$bl_cl, maxiter=1000) #create a dataset with the the treatment permutations bound to it


d=as.data.frame(cbind(d, perms))




#creates rows 1-3, 7-11
v=read.csv("Accuracy.csv") #load in data. dataset assesses discrepancies between household reported water arrival and valvemen water arrival reports.
#households report arrival times in two different ways- Q206 is a response to "on which day does your water arrive", and Q209 is a response to "at what time does your water arrive"
#See table footnote for more information
valvenames=unique(as.vector(v$Valve.Area))
Q209=data.frame(matrix(data = NA, nrow=length(valvenames), ncol=3))
Q209[,1]=valvenames
colnames(Q209)=c('VA.Name', "Q209Mean", "Q206Mode")

#function that finds the mode of a vector
mode_ <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

for (k in 1: length(valvenames)){
  VAuse=subset(v, v$Valve.Area==valvenames[k]) 
  Q206[k,2]=mean(VAuse$Q206Discrp, na.rm=TRUE)
  if (sum(!is.na(VAuse$Q206Discrp))>0) Q206[k,3]=mode_(VAuse$Q209Discrp[!is.na(VAuse$Q209Discrp)])
} 
Q206$Q206Mode=as.numeric(Q206$Q206Mode) #find the modal discrepancy (per valve area)between VM reports and household responses to
#at what time does your water arrive


valves=read.csv('valveareas.csv')
d=merge(full,valves, by="HHid" ) #merge data and dataset placing households in valve areas
d=merge(d,Q206, by="VA.Name") #merge dataset and mode calculated above by valve area




perms=genperms(Z=d$treat, blockvar=d$block, clustvar=d$bl_cl, maxiter=1000) #create a dataset with the the treatment permutations bound to it


d=as.data.frame(cbind(d, perms))


#creates rows 1-3, 7-11
table1=function(one, two){
  options(warn=-1) #suppress warnings
  dat$w1=one #creates a column with the wave 1 outcome of interest and labels it as w1
  dat$w2=two #creates a column with the wave 2 outcome of interest and labels it as w2
  dat=dat[complete.cases(dat[,c(1063,1064)]),] #use only cases for which we don't have NAs for the outcomes. Columns 1058 and 1059 are the outcome variables. 
  
  Y= dat$w2
  ate1=estate(Y=Y, Z=dat$treat) #calculate ATE
  gen=as.matrix(dat[,(63:1062)]) #select the 1000 columns with the treatment permutations
  h.ate=genouts(Y=Y, Z=dat$treat, ate=0) #create a null hypothesis treatment effect of 0
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL) #create 1000 potential outcomes with a treatment effect of 0
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975)) 
  p = p$two.tailed.p.value #get p values
  cm1=mean(dat$w2[dat$treat==0], na.rm=TRUE) #get baseline mean
  a_=c(cm1, ate1, p,1) #get baseline mean, ate, and p
  
  bl_1=subset(dat, dat$block<11) #low income blocks
  Y=bl_1$w2
  ate1=estate(Y=Y, Z=bl_1$treat)
  gen=as.matrix(bl_1[,(63:1062)])
  h.ate=genouts(Y=Y, Z=bl_1$treat, ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975))
  p = p$two.tailed.p.value
  cm2=mean(bl_1$w2[bl_1$treat==0], na.rm=TRUE)
  b_=c(cm2, ate1, p,1)
  
  ta_1=subset(dat, dat$income<3 &  (dat$tank==4| dat$refill==0)) #target population. low income without tanks or without automatically refilling tanks
  Y=ta_1$w2
  ate1=estate(Y=Y, Z=ta_1$treat)
  gen=as.matrix(ta_1[,(63:1062)])
  h.ate=genouts(Y=Y, Z=ta_1$treat, ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.025, 0.975))
  p = p$two.tailed.p.value
  cm3=mean(ta_1$w2[ta_1$treat==0], na.rm=TRUE)
  c_=c(cm3, ate1, p,1)
  
  out=t(as.data.frame(c(a_,b_,c_)))
  colnames(out)=c("overall.control.mean", "overall.ate", "p1", "p1_", "low.income.block.ctrlmean", "low.income.block.ate", "p2", "p2_","targ.ctrlmean", "targ.ate", "p3","p3_")
  rownames(out)=c(" ")
  return(out)
}



#creates rows 4-6. one tailed p-values
table2=function(one, two){
  options(warn=-1) #suppress warnings
  dat$w1=one #creates a column with the wave 1 outcome of interest and labels it as w1
  dat$w2=two #creates a column with the wave 2 outcome of interest and labels it as w2
  
  dat=dat[complete.cases(dat[,c(1063,1064)]),] #use only cases for which we don't have NAs for the outcomes. Columns 1058 and 1059 are the outcome variables.  
  Y= dat$w2
  ate1=estate(Y=Y, Z=dat$treat)
  gen=as.matrix(dat[,(63:1062)])
  h.ate=genouts(Y=Y, Z=dat$treat, ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.00, 0.95))
  if (ate1<0) {
    p = p$lesser.p.value
  } else {
    p = p$greater.p.value
  }
  cm1=mean(dat$w2[dat$treat==0], na.rm=TRUE)
  a_=c(cm1, ate1, p,1)
  
  bl_1=subset(dat, dat$block<11)
  Y=bl_1$w2
  ate1=estate(Y=Y, Z=bl_1$treat)
  gen=as.matrix(bl_1[,(63:1062)])
  h.ate=genouts(Y=Y, Z=bl_1$treat, ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.00, 0.95))
  if (ate1<0) {
    p = p$lesser.p.value
  } else {
    p = p$greater.p.value
  }
  cm2=mean(bl_1$w2[bl_1$treat==0], na.rm=TRUE)
  b_=c(cm2, ate1, p,1)
  
  ta_1=subset(dat, dat$income<3 & (dat$tank==4| dat$refill==0))
  Y=ta_1$w2
  ate1=estate(Y=Y, Z=ta_1$treat)
  gen=as.matrix(ta_1[,(63:1062)])
  h.ate=genouts(Y=Y, Z=ta_1$treat, ate=0)
  perm=gendist(Ys=h.ate, perms=gen, prob=NULL)
  p=dispdist(perm, ate1, quantiles = c(0.00, 0.95))
  if (ate1<0) {
    p = p$lesser.p.value
  } else {
    p = p$greater.p.value
  }
  cm3=mean(ta_1$w2[ta_1$treat==0], na.rm=TRUE)
  c_=c(cm3, ate1, p,1)
  out=t(as.data.frame(c(a_,b_,c_)))
  colnames(out)=c("overall.control.mean", "overall.ate", "p1", "p1_", "low.income.block.ctrlmean", "low.income.block.ate", "p2", "p2_","targ.ctrlmean", "targ.ate", "p3","p3_")
  return(out)
  rownames(out)=c(" ")
}





dat=subset(d, Q206Mode==0) 

#GENERATE OUTPUT#
#row 1: time spent waiting for water
a=data.frame(table1(dat$Q215w1, dat$Q215w2))
#row 2: missing community events
b=data.frame(table1(dat$Q219w1, dat$Q219w2))
#row 3: hours of work missed
c=data.frame(table1(dat$Q217w1, dat$Q217w2))
#row 4: need for substitutes
d.=data.frame(table2(dat$Q220w1, dat$Q220bw2))

#Multiple testing adjustments for first group
hhwe=as.data.frame(c(a$p1, a$p2, a$p3, b$p1, b$p2, b$p3, c$p1, c$p2, c$p3, d.$p1, d.$p2, d.$p3))
rownames(hhwe)=c('a$p1', 'a$p2', 'a$p3', 'b$p1', 'b$p2', 'b$p3', 'c$p1', 'c$p2', 'c$p3', 'd.$p1', 'd.$p2', 'd.$p3')
p.1=((p.adjust(hhwe[,1], method="BH")))
p.1=data.frame((matrix(p.1, ncol=3, byrow=TRUE)))


#row 5: worrying about water
e=data.frame(table2(dat$Q234w1, dat$Q234w2))
#row 6: thinking about water during the day
f=data.frame(table2(dat$Q235w1, dat$Q235w2))

#Multiple testing adjustments for second group
we=as.data.frame(c(e$p1,e$p2,e$p3,f$p1,f$p2,f$p3))
rownames(we)=c('e$p1','e$p2','e$p3','f$p1','f$p2','f$p3')
p.2=((p.adjust(we[,1], method="BH")))
p.2=data.frame(matrix(p.2, ncol=3, byrow=TRUE))


#row 7: perception that providers are competent
g=data.frame(table1(dat$Q249w1, dat$Q249w2))
#row 8: perception that providers are innovative and modern
h=data.frame(table1(dat$Q250w1, dat$Q250w2))
#row 9: perception that providers care about people like us
i=data.frame(table1(dat$Q251w1, dat$Q251w2))

#Multiple testing adjustments for third group
pe=as.data.frame(c(g$p1, g$p2, g$p3, h$p1, h$p2, h$p3, i$p1, i$p2, i$p3))
rownames(pe)=c('g$p1', 'g$p2', 'g$p3', 'h$p1', 'h$p2', 'h$p3', 'i$p1', 'i$p2', 'i$p3')
p.3=((p.adjust(pe[,1], method="BH")))
p.3=data.frame(matrix(p.3, nrow=3, byrow=TRUE))



#row 10: contacting providers directly about problems with service
j=data.frame(table1(dat$Q238_2w1, dat$Q238_2w2))
#row 11 : Holding state water utility directly responsible for service
k=data.frame(table1(dat$Q244_2w1, dat$Q244_2w2))

#Multiple testing adjustments for fourth group
ce=as.data.frame(c(j$p1,j$p2,j$p3,k$p1,k$p2,k$p3))
rownames(ce)=c('j$p1','j$p2','j$p3','k$p1','k$p2','k$p3')
p.4=as.data.frame(p.adjust(ce[,1], method="BH"))
p.4=((p.adjust(ce[,1], method="BH")))
p.4=data.frame(matrix(p.4, ncol=3, byrow=TRUE))


tab=as.data.frame(rbind(a,b,c,d.,e,f,g,h,i,j,k))
p.all=rbind(p.1,p.2,p.3,p.4)
tab$p1_=p.all[,1]
tab$p2_=p.all[,2]
tab$p3_=p.all[,3]
tab





##N###
#row 1, column 1#
nrow(dat)
#row 2, column 1#
sum(dat$treat)


#row 1, column 2#
nrow(dat[dat$block<11,])
#row 2, column 2#
sum(dat$treat[dat$block<11])
#row 1, column 3#
nrow(dat[ dat$income<3 & (dat$tank==4|dat$refill==0),])
#row 2, column 3#
sum(dat$treat[dat$income<3 & (dat$tank==4|dat$refill==0)])




###################################################################
#Table A.9. Rates of Compliance According to Different Definitions#
###################################################################

#GENERATE OUTPUT#

#Row 1- all columns
sum(full$treat)

#Row 2, Column 1
floor(.80*sum(full$treat))

#Row 3, Column 1
.80 ##From PAP

#Row 2, Column 2- HH enrolling in ND
sum(full$treat[full$Q501==1])

#Row 3, Column 2- HH enrolling in ND
sum(full$treat[full$Q501==1])/sum(full$treat)

#Row 2, Column 3- HH enrolling in ND and receiving notifications
sum(full$treat[full$Q501==1 & full$Q505_Ndmessages==1 & !(full$Q505b_SMSfreq==1)])

#Row 3, Column 3-HH enrolling in ND and receiving notifications
sum(full$treat[full$Q501==1 & full$Q505_Ndmessages==1 & !(full$Q505b_SMSfreq==1)])/sum(full$treat)


#Row 2, Column 4 -HH enrolling in ND and receiving accurate notifications
sum(full$treat[full$Q501==1 & full$Q505_Ndmessages==1 & !(full$Q505b_SMSfreq==1) & full$Q506d_SMSacc<3])

#Row 3, Column 4-HH enrolling in ND and receiving accurate notifications
sum(full$treat[full$Q501==1 & full$Q505_Ndmessages==1 & !(full$Q505b_SMSfreq==1) & full$Q506d_SMSacc<3])/sum(full$treat)

################################################################################################
#Table A.10 Power of experiment to detect CACE effects at 5% significance among the target group#
################################################################################################
#Load in all necessary data

v=read.csv("Accuracy.csv") #load in data. dataset assesses discrepancies between household reported water arrival and valvemen water arrival reports.
#households report arrival times in two different ways- Q206 is a response to "on which day does your water arrive", and Q209 is a response to "at what time does your water arrive"
#See table footnote for more information
valvenames=unique(as.vector(v$Valve.Area))
Q209=data.frame(matrix(data = NA, nrow=length(valvenames), ncol=3))
Q209[,1]=valvenames
colnames(Q209)=c('VA.Name', "Q209Mean", "Q209Mode")

#function that finds the mode of a vector
mode_ <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

for (k in 1: length(valvenames)){
  VAuse=subset(v, v$Valve.Area==valvenames[k]) 
  Q209[k,2]=mean(VAuse$Q209Discrp, na.rm=TRUE)
  if (sum(!is.na(VAuse$Q209Discrp))>0) Q209[k,3]=mode_(VAuse$Q209Discrp[!is.na(VAuse$Q209Discrp)])
} 
Q209$Q209Mode=as.numeric(Q209$Q209Mode) #find the modal discrepancy (per valve area)between VM reports and household responses to
#at what time does your water arrive


valves=read.csv('valveareas.csv')
d=merge(full,valves, by="HHid" ) #merge data and dataset placing households in valve areas
d=merge(d,Q209, by="VA.Name") #merge dataset and mode calculated above by valve area

v=read.csv("Accuracy.csv") #load in data. dataset assesses discrepancies between household reported water arrival and valvemen water arrival reports.
#households report arrival times in two different ways- Q206 is a response to "on which day does your water arrive", and Q209 is a response to "at what time does your water arrive"
#See table footnote for more information
valvenames=unique(as.vector(v$Valve.Area))
Q206=data.frame(matrix(data = NA, nrow=length(valvenames), ncol=3))
Q206[,1]=valvenames
colnames(Q206)=c('VA.Name', "Q206Mean", "Q206Mode")

#function that finds the mode of a vector
mode_ <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

for (k in 1: length(valvenames)){
  
  VAuse=subset(v, v$Valve.Area==valvenames[k])
  Q206[k,2]=mean(VAuse$Q206_discrepancy, na.rm=TRUE)
  if (sum(!is.na(VAuse$Q206_discrepancy))>0) Q206[k,3]=mode_(VAuse$Q206_discrepancy[!is.na(VAuse$Q206_discrepancy)])
} 


Q206$Q206Mode=as.numeric(Q206$Q206Mode) #find the modal discrepancy (per valve area)between VM reports and household responses to
#on what day does your water arrive


d=merge(d,Q206, by="VA.Name") #merge dataset and mode calculated above by valve area





#find compliers


#drop people who do not sign up. this includes some control group members
comp=subset(d, d$Q501==1)


comp$compliance=ifelse(comp$Q505_Ndmessages==1 & !(comp$Q505b_SMSfreq==1),1,0) #people who receive notifications and receive them NOT rarely


#find compliers with accurate messages
comp$compliance2=ifelse((comp$Q505_Ndmessages==1 & !(comp$Q505b_SMSfreq==1) & (comp$Q506d_SMSacc<3)),1,0) #people who receive notifications and receive them NOT rarely and say the messages are accurate


###assign the number of times people get water per week
comp$days[comp$supply==1]=7
comp$days[comp$supply==2]=7/2
comp$days[comp$supply==3]=7/3.5
comp$days[comp$supply==4]=7/4.5
comp$days[comp$supply==5]=7/6.5
comp$days[comp$supply>5]=NA ##end up dropping people who don't know or who have no regularity

#create dataframe for function
Y_0=comp$Q215w1 #only using wait time as an outcome
pow=cbind.data.frame(Y_0, comp$bl_cl, comp$treat, comp$block, 
                     comp$income, comp$tank, comp$compliance, comp$kaveri,
                     comp$supply, comp$compliance2, comp$Q206Mode, comp$Q209Mode, comp$refill, comp$days,
                     comp$low, comp$kav, comp$twofour, comp$noreg, comp$tank2)


colnames(pow)=c("Y_0", "bl_cl", "treat", "block", "income", "tank", "compliance", "kaveri", "supply", "compliance2", "Q206Mode", 
                "Q209Mode", "refill", "days", "low","kav","twofour","noreg","tank2")



#create a function where you can specify the ICC, SD, and ATE. This gives power for CACE only without covariate adjustment
power_CACE=function(ICC, SD, ATE_, dim){
  pow$ATE=ATE_/pow$days
 #create empty vector to store p-values in 
  p_targ=vector()
  for (j in 1:dim){ #for every simulated run of the experiment
    pow$sd=rnorm(nrow(pow),0, SD) #create sample residual error
    C_SD=sqrt((ICC*(SD^2))/(1-ICC)) #create cluster level SD from ICC. Derived from ICC formula. see ICC formula here : http://www.ncbi.nlm.nih.gov/pmc/articles/PMC1466680/
    #create cluster level errors

    clust=as.vector(unique(pow$bl_cl))
    
    pow2=subset(pow, pow$bl_cl==clust[1])
    pow2$clust_sd=rnorm(nrow(pow2),0, C_SD) #create dataframe with cluster level errors for for one cluster, and then bind all of the others to this
    
    for (i in 2:length(clust)){
      
      cluster=subset(pow, pow$bl_cl==clust[i])
      cluster$clust_sd=rnorm(nrow(cluster),0, C_SD)
      pow2=rbind.data.frame(pow2,cluster)}
    pow=pow2
    
    ##create potential outcomes. For this whole code, we assume that the way we measure compliance is the way that treatment effects are created. 
    pow$Y_1=pow$Y_0+pow$compliance*pow$ATE+pow$sd+pow$clust_sd #compliance indicator (rather than treatment indicator) delivers treatment effect
    pow$Y=pow$Y_1 #measured treatment effect
    #calculate CACE for target subgroup
    pow$income=as.vector(as.numeric(pow$income))
    sub2=subset(pow, pow$income<3 & (pow$tank==4| pow$refill==0)) #target group
    sub2reg=ivreg(Y~compliance+ as.factor(block)+Y_0,~treat+as.factor(block)+Y_0, data=sub2)
    vcov3=cluster.vcov(sub2reg, cluster=sub2$bl_cl)
    c=coeftest(sub2reg,vcov3)
    p_targ[j]= c[2,4]
  }
  pow_targ=length(p_targ[p_targ<.05])/dim
  out= pow_targ
  return(out)
}



##function for covariate adjustment

power_CACE_cov=function(ICC, SD, ATE_, dim){
  pow$ATE=ATE_/pow$days

  p_targ=vector()
  for (j in 1:dim){
    pow$sd=rnorm(nrow(pow),0, SD) #create sample residual error
    C_SD=sqrt((ICC*(SD^2))/(1-ICC)) #create cluster level SD from ICC
    #create cluster level errors
    
    clust=as.vector(unique(pow$bl_cl))
    
    pow2=subset(pow, pow$bl_cl==clust[1])
    pow2$clust_sd=rnorm(nrow(pow2),0, C_SD)
    
    for (i in 2:length(clust)){      
      cluster=subset(pow, pow$bl_cl==clust[i])
      cluster$clust_sd=rnorm(nrow(cluster),0, C_SD)
      pow2=rbind.data.frame(pow2,cluster)}
    pow=pow2
    
    ##create potential outcomes
    pow$Y_1=pow$Y_0+pow$compliance*pow$ATE+pow$sd+pow$clust_sd
    
    pow$Y=pow$Y_1
    #calculate CACE 
     
    pow$income=as.vector(as.numeric(pow$income))
    sub2=subset(pow, pow$income<3 & (pow$tank==4| pow$refill==0))
    sub2reg=ivreg(Y~compliance+kav+twofour+noreg+as.factor(block)+Y_0,
                  ~treat+kav+twofour+noreg+as.factor(block)+Y_0, data=sub2)
    vcov3=cluster.vcov(sub2reg, cluster=sub2$bl_cl)
    c=coeftest(sub2reg,vcov3)
    p_targ[j]= c[2,4]
  }

  pow_targ=length(p_targ[p_targ<.05])/dim
  out=pow_targ
  return(out)
}



##function for those with accurate notifications

power_CACE_acc=function(ICC, SD, ATE_, dim){
  pow$ATE=ATE_/pow$days

  p_targ=vector()
  for (j in 1:dim){
    pow$sd=rnorm(nrow(pow),0, SD) #create sample residual error
    C_SD=sqrt((ICC*(SD^2))/(1-ICC)) #create cluster level SD from ICC
    #create cluster level errors
    clust=as.vector(unique(pow$bl_cl))
    
    pow2=subset(pow, pow$bl_cl==clust[1])
    pow2$clust_sd=rnorm(nrow(pow2),0, C_SD)
    
    for (i in 2:length(clust)){
      
      cluster=subset(pow, pow$bl_cl==clust[i])
      cluster$clust_sd=rnorm(nrow(cluster),0, C_SD)
      pow2=rbind.data.frame(pow2,cluster)}
    pow=pow2
    
    
    ##create potential outcomes
    pow$Y_1=pow$Y_0+pow$compliance2*pow$ATE+pow$sd+pow$clust_sd
    pow$Y=pow$Y_1
    #calculate CACE for each subgroup
      pow$income=as.vector(as.numeric(pow$income))
    sub2=subset(pow, pow$income<3 & (pow$tank==4| pow$refill==0))
    sub2reg=ivreg(Y~compliance2+as.factor(block)+Y_0,~treat+as.factor(block)+Y_0, data=sub2)
    vcov3=cluster.vcov(sub2reg, cluster=sub2$bl_cl)
    c=coeftest(sub2reg,vcov3)
    p_targ[j]= c[2,4]
  }
  pow_targ=length(p_targ[p_targ<.05])/dim
  out=pow_targ
  return(out)
}





#with accuracy subset and covariates


power_CACE_cov_acc=function(ICC, SD, ATE_, dim){
  pow$ATE=ATE_/pow$days

  p_targ=vector()
  for (j in 1:dim){
    pow$sd=rnorm(nrow(pow),0, SD) #create sample residual error
    C_SD=sqrt((ICC*(SD^2))/(1-ICC)) #create cluster level SD from ICC
    #create cluster level errors
    
    clust=as.vector(unique(pow$bl_cl))
    
    pow2=subset(pow, pow$bl_cl==clust[1])
    pow2$clust_sd=rnorm(nrow(pow2),0, C_SD)
    
    for (i in 2:length(clust)){      
      cluster=subset(pow, pow$bl_cl==clust[i])
      cluster$clust_sd=rnorm(nrow(cluster),0, C_SD)
      pow2=rbind.data.frame(pow2,cluster)}
    pow=pow2
    
    ##create potential outcomes
    
    pow$Y_1=pow$Y_0+pow$compliance2*pow$ATE+pow$sd+pow$clust_sd
    pow$Y=pow$Y_1
    #calculate CACE 
       sub2=subset(pow, pow$income<3 & (pow$tank==4| pow$refill==0))
    sub2reg=ivreg(Y~compliance2+kav+twofour+noreg+as.factor(block)+Y_0,
                  ~treat+kav+twofour+noreg+as.factor(block)+Y_0, data=sub2)
    vcov3=cluster.vcov(sub2reg, cluster=sub2$bl_cl)
    c=coeftest(sub2reg,vcov3)
    p_targ[j]= c[2,4]
  }
  
  pow_targ=length(p_targ[p_targ<.05])/dim
  out=pow_targ
  return(out)
}

#GENERATE OUTPUT#

#row 1
row1=as.vector(rep(NA,4))
row1[1]=power_CACE(.01, 1, -.5, 1000)
row1[2]=power_CACE(.15, 1, -.5, 1000)
row1[3]=power_CACE(.01, 2, -2, 1000)
row1[4]=power_CACE(.15, 2, -2, 1000)
range(row1)

#row2
row2=as.vector(rep(NA,4))
row2[1]=power_CACE_cov(.01, 1, -.5, 1000)
row2[2]=power_CACE_cov(.15, 1, -.5, 1000)
row2[3]=power_CACE_cov(.01, 2, -2, 1000)
row2[4]=power_CACE_cov(.15, 2, -2, 1000)
range(row2)

#row3
row3=as.vector(rep(NA,4))
row3[1]=power_CACE_acc(.01, 1, -.5, 1000)
row3[2]=power_CACE_acc(.15, 1, -.5, 1000)
row3[3]=power_CACE_acc(.01, 2, -2, 1000)
row3[4]=power_CACE_acc(.15, 2, -2, 1000)
range(row3)
#row4
row4=as.vector(rep(NA,4))
row4[1]=power_CACE_cov_acc(.01, 1, -.5, 1000)
row4[2]=power_CACE_cov_acc(.15, 1, -.5, 1000)
row4[3]=power_CACE_cov_acc(.01, 2, -2, 1000)
row4[4]=power_CACE_cov_acc(.15, 2, -2, 1000)
range(row4)

